From 19179ad11b894960b076f6a89a34618287858f69 Mon Sep 17 00:00:00 2001
From: "Marcel W. Wysocki" <maci.stgn@gmail.com>
Date: Wed, 20 May 2026 11:55:13 +0800
Subject: [PATCH 1/5] win32: add PE32 / PE32+ parser and Capstone disassembler
 facade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New `src/splat/platforms/win32.py` (~1600 LOC, self-contained):

- Walks DOS stub, COFF file header, optional header (PE32 and PE32+),
  section table, and every populated data directory:
    0 Export, 1 Import, 2 Resource, 3 Exception (incl. PE32+ unwind
    info opcode decode), 5 Base Relocation, 6 Debug (CodeView PDB
    GUID / age extraction), 9 TLS callbacks, 10 Load Config
    (/GS SecurityCookie, /SAFESEH handlers, /guard:cf table),
    11 Bound Import, 13 Delay Import, 14 CLR Runtime Header.
- Also parses the deprecated COFF symbol table when the optional
  header points at one (vintage MSVC 4-6 binaries).
- Defensive fuzz-cap on every iteration loop so a malformed PE
  can't make the parser scan billions of records.
- Public helpers: sanitize_label, compute_iat_labels,
  compute_export_labels (used by both create_config and the text
  segtype to keep symbol_addrs.txt and disasm references in sync);
  ptr_layout / resolve_exact_encoding shared across segtypes.

New `src/splat/disassembler/capstone_disassembler.py` (~110 LOC):
thin facade that picks CS_MODE_32 / CS_MODE_64 from
pe.is_pe32_plus, leaves engine creation lazy (target may not be
parsed yet when configure() runs), and exposes the same
known_types() primitive vocabulary spimdisasm uses so
symbol_addrs entries can use the conventional type:u32 /
type:asciz tokens.

Both modules are stand-alone — nothing else in this commit
imports them yet.
---
 .../disassembler/capstone_disassembler.py     |  109 ++
 src/splat/platforms/win32.py                  | 1642 +++++++++++++++++
 2 files changed, 1751 insertions(+)
 create mode 100644 src/splat/disassembler/capstone_disassembler.py
 create mode 100644 src/splat/platforms/win32.py

diff --git a/src/splat/disassembler/capstone_disassembler.py b/src/splat/disassembler/capstone_disassembler.py
new file mode 100644
index 00000000..42043bae
--- /dev/null
+++ b/src/splat/disassembler/capstone_disassembler.py
@@ -0,0 +1,109 @@
+"""Capstone-backed disassembler used by the win32 platform.
+
+The MIPS disassembler stack (spimdisasm/rabbitizer) is incompatible with x86,
+so win32 segments do not flow through `CommonSegCodeSubsegment`. This module
+exposes a tiny façade: configure a Capstone engine once, hand it out to
+segtypes for them to decode byte ranges, and surface known section names.
+"""
+
+from typing import Optional, Set
+
+from . import disassembler
+from ..util import log
+
+
+class CapstoneDisassembler(disassembler.Disassembler):
+    CAPSTONE_MIN = (5, 0, 0)
+
+    def __init__(self):
+        self._md = None
+
+    def configure(self):
+        # Defer engine creation to `get_engine()` — at this point in startup
+        # the target hasn't been parsed yet, so we don't yet know whether
+        # it's PE32 (CS_MODE_32) or PE32+ (CS_MODE_64).
+        try:
+            import capstone  # noqa: F401 — just verify availability
+        except ImportError:
+            log.error(
+                "The win32 platform requires the optional 'capstone' dependency. "
+                "Install it with: pip install 'splat64[win32]'"
+            )
+
+    def check_version(self, skip_version_check: bool, splat_version: str):
+        try:
+            import capstone
+        except ImportError:
+            log.error(
+                "The win32 platform requires the optional 'capstone' dependency. "
+                "Install it with: pip install 'splat64[win32]'"
+            )
+
+        if not skip_version_check:
+            cs_version = getattr(capstone, "__version__", None)
+            if cs_version is not None:
+                parts = []
+                for chunk in cs_version.split("."):
+                    digits = "".join(c for c in chunk if c.isdigit())
+                    parts.append(int(digits) if digits else 0)
+                while len(parts) < 3:
+                    parts.append(0)
+                if tuple(parts[:3]) < self.CAPSTONE_MIN:
+                    log.error(
+                        f"splat {splat_version} requires at least capstone "
+                        f"{self.CAPSTONE_MIN}, but {cs_version} is installed"
+                    )
+            log.write(
+                f"splat {splat_version} (powered by capstone {cs_version or '?'})"
+            )
+
+    def get_engine(self):
+        if self._md is not None:
+            return self._md
+
+        import capstone
+        from ..platforms import win32 as win32_platform
+
+        arch = capstone.CS_ARCH_X86
+        # Honour the parsed PE's bitness when the platform module has been
+        # initialized; otherwise default to PE32 (32-bit).
+        if win32_platform.info.is_pe32_plus:
+            mode = capstone.CS_MODE_64
+        else:
+            mode = capstone.CS_MODE_32
+
+        md = capstone.Cs(arch, mode)
+        md.detail = True
+        md.syntax = capstone.CS_OPT_SYNTAX_INTEL
+        self._md = md
+        return md
+
+    def known_types(self) -> Set[str]:
+        # Mirror the standard primitive type names that the spimdisasm
+        # backend exposes so symbol_addrs files written for win32 binaries
+        # can use the same `type:u32` / `type:asciz` vocabulary.
+        return {
+            "u8",
+            "u16",
+            "u32",
+            "u64",
+            "s8",
+            "s16",
+            "s32",
+            "s64",
+            "f32",
+            "f64",
+            "char",
+            "char*",
+            "asciz",
+        }
+
+
+def get_capstone_disassembler() -> Optional["CapstoneDisassembler"]:
+    """Return the active CapstoneDisassembler if one is wired up, else None."""
+    from . import disassembler_instance
+
+    inst = disassembler_instance.get_instance()
+    if isinstance(inst, CapstoneDisassembler):
+        return inst
+    return None
diff --git a/src/splat/platforms/win32.py b/src/splat/platforms/win32.py
new file mode 100644
index 00000000..15d00b63
--- /dev/null
+++ b/src/splat/platforms/win32.py
@@ -0,0 +1,1642 @@
+"""Win32 PE platform support.
+
+Parses the PE/COFF header of the target binary at `init()` time and exposes
+the result via module-level globals that win32 segtypes can consult.
+
+The parser intentionally implements only what splat needs (DOS stub, file
+header, optional header, section table). It accepts both PE32 (i386, what
+MSVC6 emits) and PE32+ (x86_64) optional headers but the rest of the win32
+support is geared at PE32 / i386.
+"""
+
+from dataclasses import dataclass, field
+import struct
+from typing import Dict, List, Optional, Set, Tuple
+
+from ..util import log
+
+
+DOS_MAGIC = b"MZ"
+PE_MAGIC = b"PE\x00\x00"
+
+MACHINE_I386 = 0x014C
+MACHINE_AMD64 = 0x8664
+MACHINE_ARM32 = 0x01C4  # ARMv7 Thumb-2 (Windows on ARM 32-bit)
+MACHINE_ARM64 = 0xAA64  # AArch64 (Windows on ARM 64-bit)
+
+# IMAGE_OPTIONAL_HEADER.Magic — identifies which optional-header
+# layout follows (PE32 has 32-bit fields for ImageBase etc., PE32+
+# has 64-bit equivalents).
+OPT_MAGIC_PE32 = 0x10B
+OPT_MAGIC_PE32_PLUS = 0x20B
+
+# IMAGE_DIRECTORY_ENTRY_* indices into pe.data_directories.
+DIR_EXPORT = 0
+DIR_IMPORT = 1
+DIR_RESOURCE = 2
+DIR_EXCEPTION = 3
+DIR_CERTIFICATE = 4
+DIR_BASERELOC = 5
+DIR_DEBUG = 6
+DIR_ARCHITECTURE = 7
+DIR_GLOBALPTR = 8
+DIR_TLS = 9
+DIR_LOAD_CONFIG = 10
+DIR_BOUND_IMPORT = 11
+DIR_IAT = 12
+DIR_DELAY_IMPORT = 13
+DIR_COM_DESCRIPTOR = 14
+
+# Section header flags (IMAGE_SCN_*)
+SCN_CNT_CODE = 0x00000020
+SCN_CNT_INITIALIZED_DATA = 0x00000040
+SCN_CNT_UNINITIALIZED_DATA = 0x00000080
+SCN_MEM_READ = 0x40000000
+SCN_MEM_WRITE = 0x80000000
+SCN_MEM_EXECUTE = 0x20000000
+
+
+@dataclass
+class PESection:
+    """One IMAGE_SECTION_HEADER entry from the PE section table.
+
+    `virtual_address` is the section's RVA — its load-time location
+    relative to ImageBase. `raw_pointer` is the on-disk offset. The
+    section spans [virtual_address, virtual_address + virtual_size)
+    in memory and [raw_pointer, raw_pointer + raw_size) in the file;
+    when virtual_size > raw_size the loader zero-fills the tail."""
+
+    name: str
+    virtual_size: int
+    virtual_address: int  # RVA
+    raw_size: int
+    raw_pointer: int  # file offset
+    characteristics: int
+
+    @property
+    def is_code(self) -> bool:
+        return bool(self.characteristics & (SCN_CNT_CODE | SCN_MEM_EXECUTE))
+
+    @property
+    def is_bss(self) -> bool:
+        return bool(self.characteristics & SCN_CNT_UNINITIALIZED_DATA)
+
+    @property
+    def is_writable(self) -> bool:
+        return bool(self.characteristics & SCN_MEM_WRITE)
+
+    @property
+    def is_readonly_data(self) -> bool:
+        return (
+            bool(self.characteristics & SCN_CNT_INITIALIZED_DATA)
+            and not self.is_writable
+            and not self.is_code
+        )
+
+
+@dataclass
+class PEExport:
+    """One entry in the export table (data dir 0).
+
+    `name` is None for ordinal-only exports (the DLL exposes the
+    function by number rather than symbolic name). `rva` is the
+    in-image RVA the export points at — UNLESS `forwarder` is set,
+    in which case the export forwards to another DLL and rva is
+    interpreted by the loader as a pointer to the forwarder string."""
+
+    name: Optional[str]
+    ordinal: int
+    rva: int
+    forwarder: Optional[str] = None
+
+
+@dataclass
+class PEImport:
+    """One imported symbol — either eager (data dir 1, IMAGE_IMPORT_*)
+    or delay-loaded (data dir 13, IMAGE_DELAYLOAD_*).
+
+    `dll` is the import source's DLL filename. Exactly one of `name`
+    or `ordinal` is set: `name` for the typical hint/name import,
+    `ordinal` (`name` is None) when the import is by ordinal index.
+    `iat_rva` is the in-image RVA of the IAT slot the loader writes
+    the resolved function pointer into — call sites read through it
+    via `call qword ptr [<iat_va>]` style indirect calls."""
+
+    dll: str
+    name: Optional[str]
+    ordinal: Optional[int]
+    iat_rva: int
+
+
+@dataclass
+class PEBoundImport:
+    """IMAGE_BOUND_IMPORT_DESCRIPTOR entry.
+
+    `timestamp` is the DLL build timestamp the binary was bound against;
+    `forwarder_refs` are 0-N additional DLLs the bound-import chain
+    transitively pre-resolved through."""
+
+    dll: str
+    timestamp: int
+    forwarder_refs: List[str] = field(default_factory=list)
+
+
+@dataclass
+class CLRHeader:
+    """Decoded IMAGE_COR20_HEADER (data dir 14 — `.NET CLR Runtime
+    Header`). Identifies the binary as a .NET assembly and points at
+    the CLR metadata + entry-point token.
+
+    Splat doesn't decode the metadata tables themselves (would need
+    the full ECMA-335 reader); this just surfaces the header fields
+    so the analyst sees the assembly is .NET-native and can fetch
+    the metadata blob from its RVA."""
+
+    cb_size: int  # always 72
+    runtime_major: int
+    runtime_minor: int
+    metadata_rva: int
+    metadata_size: int
+    flags: int
+    entry_point_token_or_rva: int
+    resources_rva: int
+    resources_size: int
+    strong_name_signature_rva: int
+    strong_name_signature_size: int
+
+
+@dataclass
+class UnwindInfo:
+    """Decoded x64 SEH IMAGE_UNWIND_INFO record (PE32+ only).
+
+    `prolog_size` is the number of bytes the prologue occupies; codes
+    are a flattened list of `(offset_in_prolog, op_name, info_nibble)`
+    triples describing the prolog ops the unwinder will replay. The
+    `chained_function_rva` is set when the chain-info flag is on and
+    a subsequent RUNTIME_FUNCTION's begin/end/unwind triple follows
+    the codes."""
+
+    version: int
+    flags: int
+    prolog_size: int
+    frame_register: int  # 0 = none; otherwise the x86_64 register index
+    frame_register_offset: int  # nibble × 16
+    codes: List[Tuple[int, str, int]] = field(default_factory=list)
+    chained_function_rva: Optional[int] = None
+
+
+@dataclass
+class COFFSymbol:
+    """One IMAGE_SYMBOL record (18 bytes) from the deprecated COFF
+    symbol table that vintage MSVC linkers wrote past the last raw
+    section. Modern toolchains rely on .pdb instead and leave the
+    optional header's PointerToSymbolTable zero, so this list is
+    typically empty on Windows 7+ era binaries."""
+
+    name: str
+    value: int  # VA or RVA depending on storage class
+    section_number: int  # 1-based, 0 = undefined, -1 = absolute, -2 = debug
+    sym_type: int  # combined base + complex type
+    storage_class: int
+    aux_records: int  # number of trailing IMAGE_AUX_SYMBOL entries
+
+
+@dataclass
+class PEResource:
+    """One leaf in the .rsrc tree.
+
+    `rtype` is the resource-type ID (or a UTF-16 name for custom types).
+    `rid` is the per-type identifier or name. `language` is the locale id.
+    `rva` / `size` point at the resource's raw bytes inside the image.
+    """
+
+    rtype: object  # int (standard) or str (custom-named type)
+    rid: object  # int or str
+    language: int
+    rva: int
+    size: int
+
+
+@dataclass
+class PEInfo:
+    """Result of parsing a Win32 PE32 / PE32+ image.
+
+    Carries every field splat needs from the DOS stub, COFF file
+    header, optional header, section table, and all 16 data
+    directories. Populated by `parse_pe(target_bytes)` and exposed
+    via the module-level `info` global so segtypes and the
+    disassembler can consult it without threading it through every
+    call. Empty / zero defaults mean 'directory not present' — there
+    is no separate Optional[List] for directory-derived fields."""
+
+    machine: int = 0
+    num_sections: int = 0
+    timestamp: int = 0
+    characteristics: int = 0
+
+    is_pe32_plus: bool = False
+    image_base: int = 0
+    entry_point_rva: int = 0
+    section_alignment: int = 0
+    file_alignment: int = 0
+    size_of_image: int = 0
+    size_of_headers: int = 0
+    subsystem: int = 0
+    dll_characteristics: int = 0
+    size_of_stack_reserve: int = 0
+    size_of_stack_commit: int = 0
+    size_of_heap_reserve: int = 0
+    size_of_heap_commit: int = 0
+    linker_major: int = 0
+    linker_minor: int = 0
+
+    pe_header_offset: int = 0
+    sections: List[PESection] = field(default_factory=list)
+
+    # 16 (rva, size) entries from the optional header. Populated only when
+    # NumberOfRvaAndSizes is large enough.
+    data_directories: List[Tuple[int, int]] = field(default_factory=list)
+    exports: List[PEExport] = field(default_factory=list)
+    export_dll_name: Optional[str] = None
+    imports: List[PEImport] = field(default_factory=list)
+    bound_imports: List[PEBoundImport] = field(default_factory=list)
+    delay_imports: List[PEImport] = field(default_factory=list)
+    # PE32+ / ARM: array of (begin_rva, end_rva, unwind_rva) describing
+    # function bounds for SEH unwinding. Empty for PE32.
+    runtime_functions: List[Tuple[int, int, int]] = field(default_factory=list)
+    # /GS security cookie VA (data dir 10 → IMAGE_LOAD_CONFIG_DIRECTORY).
+    security_cookie_va: int = 0
+    # /SAFESEH handler RVAs (PE32 only).
+    safe_seh_handlers: List[int] = field(default_factory=list)
+    # /guard:cf — array of valid indirect-call target RVAs.
+    cfg_function_rvas: List[int] = field(default_factory=list)
+    cfg_flags: int = 0
+    # RVAs the loader's base-relocation logic identifies as absolute
+    # pointers (HIGHLOW for PE32, DIR64 for PE32+). Useful for emitting
+    # data-section .long/.quad entries with symbolic targets.
+    pointer_rvas: Set[int] = field(default_factory=set)
+    # Deprecated COFF symbol table at the end of the file (PE binaries
+    # essentially never have these populated — debug info lives in the
+    # external .pdb instead — but a non-zero value is informative.)
+    coff_symtab_ptr: int = 0
+    coff_num_symbols: int = 0
+    # Parsed IMAGE_SYMBOL records when coff_symtab_ptr/coff_num_symbols
+    # are non-zero. Empty on modern PEs.
+    coff_symbols: List["COFFSymbol"] = field(default_factory=list)
+    # PDB filename embedded in the Debug directory's CodeView record, when
+    # present.
+    pdb_path: Optional[str] = None
+    # GUID (RSDS) or 32-bit signature (NB10) identifying the matching PDB.
+    pdb_guid: Optional[str] = None
+    # Build age — incremented every time the PDB is updated.
+    pdb_age: Optional[int] = None
+    # TLS callback VAs gathered from data directory 9.
+    tls_callback_vas: List[int] = field(default_factory=list)
+    # Decoded IMAGE_UNWIND_INFO records, keyed by unwind RVA.
+    # PE32+ only; remains empty for PE32 binaries.
+    unwind_info: Dict[int, UnwindInfo] = field(default_factory=dict)
+    # Decoded IMAGE_COR20_HEADER when data dir 14 is populated (.NET).
+    clr_header: Optional[CLRHeader] = None
+    # Resources enumerated from data directory 2 (.rsrc).
+    resources: List[PEResource] = field(default_factory=list)
+    # Decoded VS_VERSIONINFO key/value pairs (CompanyName, FileVersion,
+    # ProductName, OriginalFilename, etc.).
+    version_info: dict = field(default_factory=dict)
+
+    @property
+    def entry_point_va(self) -> int:
+        return self.image_base + self.entry_point_rva
+
+    def section_by_name(self, name: str) -> Optional[PESection]:
+        for s in self.sections:
+            if s.name == name:
+                return s
+        return None
+
+    def rva_to_file_offset(self, rva: int) -> Optional[int]:
+        """Translate an RVA to its on-disk file offset, or None when
+        the RVA isn't backed by file bytes — either because it falls
+        outside every section's virtual range or because it sits in
+        the virtual-only tail of a section whose VirtualSize exceeds
+        SizeOfRawData (loader zero-fills that range; no file bytes
+        back it). Callers must handle None to avoid reading into a
+        neighbouring section's data."""
+        for s in self.sections:
+            sec_end = s.virtual_address + max(s.virtual_size, s.raw_size)
+            if not (s.virtual_address <= rva < sec_end):
+                continue
+            # In a section whose VirtualSize > SizeOfRawData (MSVC zero-init
+            # tail or read-only constants past the file boundary), RVAs
+            # within the trailing virtual-only range have NO backing
+            # bytes — the loader zero-fills them at map time. Returning
+            # raw_pointer + offset for those RVAs would land in the
+            # next section's raw bytes. Reject instead.
+            offset_in_section = rva - s.virtual_address
+            if offset_in_section >= s.raw_size:
+                return None
+            return s.raw_pointer + offset_in_section
+        return None
+
+    def va_to_file_offset(self, va: int) -> Optional[int]:
+        """Translate an image-base-relative virtual address to a file
+        offset. Convenience wrapper that subtracts `image_base` and
+        delegates to `rva_to_file_offset`; same None semantics."""
+        return self.rva_to_file_offset(va - self.image_base)
+
+
+# Populated by `init`, consulted by segtypes/disassembler.
+info: PEInfo = PEInfo()
+# Full file bytes — kept on the side so heuristics in segtypes can peek
+# at arbitrary section content (e.g. validate a candidate function start
+# byte) without threading rom_bytes through every call.
+raw_image: bytes = b""
+
+
+def resolve_exact_encoding(
+    yaml: object, parent: "Optional[object]", default: bool = False
+) -> bool:
+    """Shared `exact_encoding` flag resolution used by Win32SegText /
+    Win32SegData / Win32SegPdata. Priority order matches what users
+    expect: per-subsegment YAML setting wins; if absent, fall back to
+    the parent code-group YAML; finally fall back to `default`. The
+    flag toggles label-substitution off so emitted bytes match the
+    original file verbatim (needed for byte-identical reassembly)."""
+    if isinstance(yaml, dict):
+        v = yaml.get("exact_encoding")
+        if v is not None:
+            return bool(v)
+    if parent is not None:
+        parent_yaml = getattr(parent, "yaml", None)
+        if isinstance(parent_yaml, dict):
+            v = parent_yaml.get("exact_encoding")
+            if v is not None:
+                return bool(v)
+    return default
+
+
+def sanitize_label(s: str) -> str:
+    """Canonical GAS-label sanitization shared by every site that emits
+    labels derived from PE strings. Non-alphanumeric chars become '_';
+    leading-digit identifiers (GAS-invalid) are prefixed with '_'."""
+    out = "".join(c if c.isalnum() or c == "_" else "_" for c in s)
+    if out and out[0].isdigit():
+        out = "_" + out
+    return out
+
+
+def compute_iat_labels(pe: PEInfo) -> Dict[int, str]:
+    """Return a {slot_va: label} mapping for every IAT (eager + delay)
+    slot. Labels match what `create_win32_config` writes to
+    symbol_addrs.txt — including dedup-on-collision behaviour — so
+    disassembly references resolve to the same identifiers."""
+    out: Dict[int, str] = {}
+
+    def populate(items: List[PEImport], prefix: str) -> None:
+        seen: Set[str] = set()
+        for imp in items:
+            slot_va = pe.image_base + imp.iat_rva
+            nm = imp.name or f"ordinal_{imp.ordinal}"
+            safe = sanitize_label(nm)
+            # Empty DLL stem after sanitisation (corrupted descriptor
+            # with missing name) — substitute a recognisable
+            # placeholder so the label doesn't collapse to `imp__foo`
+            # for every unknown-DLL import.
+            dll_safe = sanitize_label(imp.dll) or "unknown"
+            full = f"{prefix}_{dll_safe}_{safe}"
+            if full in seen:
+                full = f"{full}__rva{imp.iat_rva:X}"
+            seen.add(full)
+            out[slot_va] = full
+
+    populate(pe.imports, "imp")
+    populate(pe.delay_imports, "dimp")
+    return out
+
+
+def compute_export_labels(
+    pe: PEInfo, reserved: Optional[Set[str]] = None
+) -> Dict[int, str]:
+    """Return a {export_va: label} mapping for every non-forwarder
+    export. `reserved` is a pre-seeded set of labels already in use
+    (e.g. {'entrypoint'}); colliding exports get an ordinal suffix.
+    Matches create_win32_config's symbol_addrs emission."""
+    out: Dict[int, str] = {}
+    seen: Set[str] = set(reserved or set())
+    for exp in pe.exports:
+        if exp.forwarder is not None:
+            continue
+        nm = exp.name or f"export_{exp.ordinal}"
+        safe = sanitize_label(nm)
+        if safe in seen:
+            safe = f"{safe}__ord{exp.ordinal}"
+        seen.add(safe)
+        out[pe.image_base + exp.rva] = safe
+    return out
+
+
+def parse_pe(data: bytes) -> PEInfo:
+    """Parse `data` as a Win32 PE32 or PE32+ image and return a populated
+    `PEInfo`. Walks the DOS stub, COFF file header, optional header, and
+    every populated data directory:
+
+      0  Export Table          → `exports`, `export_dll_name`
+      1  Import Table          → `imports` (+ IAT slot RVAs)
+      2  Resource Table        → `resources`, `version_info`
+      3  Exception Table       → `runtime_functions` + `unwind_info`
+      5  Base Relocation Table → `pointer_rvas`
+      6  Debug                 → `pdb_path`, `pdb_guid`, `pdb_age`
+      9  TLS Table             → `tls_callback_vas`
+      10 Load Config           → `security_cookie_va`, `safe_seh_handlers`,
+                                 `cfg_function_rvas`, `cfg_flags`
+      11 Bound Import          → `bound_imports`
+      13 Delay Import          → `delay_imports`
+      14 CLR Runtime Header    → `clr_header` (.NET assemblies)
+
+    Plus the deprecated COFF symbol table when the optional header
+    points at one (`coff_symbols`).
+
+    Logs a fatal error and exits on structural malformations: missing
+    MZ/PE magics, mismatched machine/magic combinations, optional
+    header below the per-format minimum size, or sections that run
+    past EOF. Every iteration loop has a defensive cap so a fuzzed PE
+    can't make the parser scan past realistic bounds. Safe to call on
+    hand-crafted byte buffers."""
+    if len(data) < 0x40 or data[:2] != DOS_MAGIC:
+        log.error("win32 target does not start with an MZ DOS header")
+
+    pe_off = struct.unpack_from("<I", data, 0x3C)[0]
+    if pe_off + 24 > len(data) or data[pe_off : pe_off + 4] != PE_MAGIC:
+        log.error(f"win32 target does not contain a PE header at 0x{pe_off:X}")
+
+    # COFF file header (20 bytes) follows the 4-byte PE signature.
+    coff_off = pe_off + 4
+    (
+        machine,
+        num_sections,
+        timestamp,
+        coff_symtab_ptr,
+        coff_num_symbols,
+        size_of_optional_header,
+        characteristics,
+    ) = struct.unpack_from("<HHIIIHH", data, coff_off)
+
+    opt_off = coff_off + 20
+    if size_of_optional_header < 2:
+        log.error("win32 target has no PE optional header")
+    if opt_off + size_of_optional_header > len(data):
+        log.error("win32 optional header runs past end of file")
+
+    magic = struct.unpack_from("<H", data, opt_off)[0]
+    is_pe32_plus = magic == OPT_MAGIC_PE32_PLUS
+    if magic not in (OPT_MAGIC_PE32, OPT_MAGIC_PE32_PLUS):
+        log.error(f"win32 target has unknown optional header magic 0x{magic:04X}")
+
+    # Cross-validate Machine vs Optional Header magic: i386 / ARM32 are
+    # always PE32 (magic 0x10B); AMD64 / ARM64 are always PE32+ (magic
+    # 0x20B). A mismatch indicates a corrupt or fabricated PE.
+    _M32 = {MACHINE_I386, MACHINE_ARM32}
+    _M64 = {MACHINE_AMD64, MACHINE_ARM64}
+    if machine in _M64 and not is_pe32_plus:
+        log.error(
+            f"win32 target's Machine (0x{machine:04X}) is 64-bit but the "
+            f"optional header magic (0x{magic:04X}) says PE32 — corrupt PE"
+        )
+    if machine in _M32 and is_pe32_plus:
+        log.error(
+            f"win32 target's Machine (0x{machine:04X}) is 32-bit but the "
+            f"optional header magic (0x{magic:04X}) says PE32+ — corrupt PE"
+        )
+
+    # Reject truncated optional headers: PE32 needs 96 bytes (standard +
+    # windows-specific); PE32+ needs 112. Anything smaller can't carry the
+    # data-directory offsets we expect.
+    min_opt = 112 if is_pe32_plus else 96
+    if size_of_optional_header < min_opt:
+        log.error(
+            f"win32 optional header is {size_of_optional_header} bytes; "
+            f"need at least {min_opt} for the data directories"
+        )
+
+    pe = PEInfo(
+        machine=machine,
+        num_sections=num_sections,
+        timestamp=timestamp,
+        characteristics=characteristics,
+        is_pe32_plus=is_pe32_plus,
+        pe_header_offset=pe_off,
+        coff_symtab_ptr=coff_symtab_ptr,
+        coff_num_symbols=coff_num_symbols,
+    )
+
+    # MajorLinkerVersion + MinorLinkerVersion (BYTE BYTE) at opt_off + 2.
+    pe.linker_major = data[opt_off + 2] if opt_off + 2 < len(data) else 0
+    pe.linker_minor = data[opt_off + 3] if opt_off + 3 < len(data) else 0
+
+    if not is_pe32_plus:
+        # PE32 optional header layout (offsets relative to opt_off).
+        pe.entry_point_rva = struct.unpack_from("<I", data, opt_off + 16)[0]
+        pe.image_base = struct.unpack_from("<I", data, opt_off + 28)[0]
+        pe.section_alignment = struct.unpack_from("<I", data, opt_off + 32)[0]
+        pe.file_alignment = struct.unpack_from("<I", data, opt_off + 36)[0]
+        pe.size_of_image = struct.unpack_from("<I", data, opt_off + 56)[0]
+        pe.size_of_headers = struct.unpack_from("<I", data, opt_off + 60)[0]
+        pe.subsystem = struct.unpack_from("<H", data, opt_off + 68)[0]
+        pe.dll_characteristics = struct.unpack_from("<H", data, opt_off + 70)[0]
+        pe.size_of_stack_reserve = struct.unpack_from("<I", data, opt_off + 72)[0]
+        pe.size_of_stack_commit = struct.unpack_from("<I", data, opt_off + 76)[0]
+        pe.size_of_heap_reserve = struct.unpack_from("<I", data, opt_off + 80)[0]
+        pe.size_of_heap_commit = struct.unpack_from("<I", data, opt_off + 84)[0]
+        num_rva = struct.unpack_from("<I", data, opt_off + 92)[0]
+        dd_off = opt_off + 96
+    else:
+        pe.entry_point_rva = struct.unpack_from("<I", data, opt_off + 16)[0]
+        pe.image_base = struct.unpack_from("<Q", data, opt_off + 24)[0]
+        pe.section_alignment = struct.unpack_from("<I", data, opt_off + 32)[0]
+        pe.file_alignment = struct.unpack_from("<I", data, opt_off + 36)[0]
+        pe.size_of_image = struct.unpack_from("<I", data, opt_off + 56)[0]
+        pe.size_of_headers = struct.unpack_from("<I", data, opt_off + 60)[0]
+        pe.subsystem = struct.unpack_from("<H", data, opt_off + 68)[0]
+        pe.dll_characteristics = struct.unpack_from("<H", data, opt_off + 70)[0]
+        pe.size_of_stack_reserve = struct.unpack_from("<Q", data, opt_off + 72)[0]
+        pe.size_of_stack_commit = struct.unpack_from("<Q", data, opt_off + 80)[0]
+        pe.size_of_heap_reserve = struct.unpack_from("<Q", data, opt_off + 88)[0]
+        pe.size_of_heap_commit = struct.unpack_from("<Q", data, opt_off + 96)[0]
+        num_rva = struct.unpack_from("<I", data, opt_off + 108)[0]
+        dd_off = opt_off + 112
+
+    for i in range(min(num_rva, 16)):
+        if dd_off + 8 > opt_off + size_of_optional_header:
+            break
+        rva = struct.unpack_from("<I", data, dd_off + i * 8)[0]
+        size = struct.unpack_from("<I", data, dd_off + i * 8 + 4)[0]
+        pe.data_directories.append((rva, size))
+
+    section_off = opt_off + size_of_optional_header
+    # PE spec caps NumberOfSections at 96. Anything higher is malformed —
+    # cap defensively so we don't iterate a fuzzed huge value.
+    capped_num_sections = min(num_sections, 96)
+    for i in range(capped_num_sections):
+        sh = section_off + i * 40
+        if sh + 40 > len(data):
+            log.error(f"win32 section header {i} runs past end of file")
+        raw_name = data[sh : sh + 8]
+        name = raw_name.split(b"\x00", 1)[0].decode("ascii", errors="replace")
+        (
+            virt_size,
+            virt_addr,
+            raw_size,
+            raw_ptr,
+            _ptr_relocs,
+            _ptr_linenums,
+            _num_relocs,
+            _num_linenums,
+            scn_chars,
+        ) = struct.unpack_from("<IIIIIIHHI", data, sh + 8)
+        pe.sections.append(
+            PESection(
+                name=name,
+                virtual_size=virt_size,
+                virtual_address=virt_addr,
+                raw_size=raw_size,
+                raw_pointer=raw_ptr,
+                characteristics=scn_chars,
+            )
+        )
+
+    parse_exports(data, pe)
+    parse_imports(data, pe)
+    parse_bound_imports(data, pe)
+    parse_delay_imports(data, pe)
+    parse_relocations(data, pe)
+    parse_exception_table(data, pe)
+    parse_load_config(data, pe)
+    parse_debug(data, pe)
+    parse_tls(data, pe)
+    parse_resources(data, pe)
+    parse_version_info(data, pe)
+    parse_coff_symtab(data, pe)
+    parse_unwind_info(data, pe)
+    parse_clr_header(data, pe)
+    return pe
+
+
+_MAX_CSTR_LEN = 4096
+
+
+def _read_cstr(data: bytes, off: int) -> str:
+    """Read a NUL-terminated ASCII string. Truncates at `_MAX_CSTR_LEN`
+    to bound parse-time cost on malformed or non-NUL-terminated input."""
+    if off < 0 or off >= len(data):
+        return ""
+    cap = min(off + _MAX_CSTR_LEN, len(data))
+    end = data.find(b"\x00", off, cap)
+    if end < 0:
+        end = cap
+    return data[off:end].decode("ascii", errors="replace")
+
+
+def parse_exports(data: bytes, pe: PEInfo) -> None:
+    """Populate `pe.exports` and `pe.export_dll_name` from data directory 0."""
+    if not pe.data_directories:
+        return
+    exp_rva, exp_size = pe.data_directories[DIR_EXPORT]
+    if exp_rva == 0 or exp_size == 0:
+        return
+    exp_off = pe.rva_to_file_offset(exp_rva)
+    if exp_off is None or exp_off + 40 > len(data):
+        return
+
+    (
+        _flags,
+        _ts,
+        _vmaj,
+        _vmin,
+        name_rva,
+        ord_base,
+        num_funcs,
+        num_names,
+        funcs_rva,
+        names_rva,
+        ordinals_rva,
+    ) = struct.unpack_from("<IIHHIIIIIII", data, exp_off)
+
+    if name_rva:
+        name_off = pe.rva_to_file_offset(name_rva)
+        if name_off is not None:
+            pe.export_dll_name = _read_cstr(data, name_off)
+
+    funcs_off = pe.rva_to_file_offset(funcs_rva) if funcs_rva else None
+    names_off = pe.rva_to_file_offset(names_rva) if names_rva else None
+    ordinals_off = pe.rva_to_file_offset(ordinals_rva) if ordinals_rva else None
+
+    if funcs_off is None:
+        return
+
+    # Defensive caps. Real-world DLLs export a few thousand symbols at most
+    # (e.g. ntdll.dll ≈ 2000). A fuzzed export directory could declare
+    # millions of entries, causing the parser to scan gigabytes of data.
+    EXPORT_LIMIT = 65536
+    num_funcs = min(num_funcs, EXPORT_LIMIT)
+    num_names = min(num_names, EXPORT_LIMIT)
+
+    # Map ordinal-index → name (some functions are export-by-ordinal only).
+    name_for_ordinal: Dict[int, str] = {}
+    if names_off is not None and ordinals_off is not None:
+        for i in range(num_names):
+            if names_off + i * 4 + 4 > len(data):
+                break
+            if ordinals_off + i * 2 + 2 > len(data):
+                break
+            name_ptr_rva = struct.unpack_from("<I", data, names_off + i * 4)[0]
+            ordinal_idx = struct.unpack_from("<H", data, ordinals_off + i * 2)[0]
+            name_off = pe.rva_to_file_offset(name_ptr_rva)
+            if name_off is None:
+                continue
+            name_for_ordinal[ordinal_idx] = _read_cstr(data, name_off)
+
+    for i in range(num_funcs):
+        if funcs_off + i * 4 + 4 > len(data):
+            break
+        func_rva = struct.unpack_from("<I", data, funcs_off + i * 4)[0]
+        if func_rva == 0:
+            continue
+        forwarder: Optional[str] = None
+        # When the function RVA falls inside the export directory itself,
+        # it's a forwarder string (DLL.Func or DLL.#ord) rather than code.
+        if exp_rva <= func_rva < exp_rva + exp_size:
+            f_off = pe.rva_to_file_offset(func_rva)
+            if f_off is not None:
+                forwarder = _read_cstr(data, f_off)
+        pe.exports.append(
+            PEExport(
+                name=name_for_ordinal.get(i),
+                ordinal=i + ord_base,
+                rva=func_rva,
+                forwarder=forwarder,
+            )
+        )
+
+
+def ptr_layout(is_pe32_plus: bool) -> Tuple[int, str, str, int]:
+    """Per-bitness pointer-slot constants used by data + text segments:
+    (size_bytes, struct_fmt, asm_directive, hex_print_width).
+
+    PE32 uses 4-byte slots emitted as `.long 0xXXXXXXXX`; PE32+ uses
+    8-byte slots emitted as `.quad 0xXXXXXXXXXXXXXXXX`."""
+    if is_pe32_plus:
+        return 8, "<Q", ".quad", 16
+    return 4, "<I", ".long", 8
+
+
+def _thunk_layout(is_pe32_plus: bool) -> Tuple[int, int, str]:
+    """Per-bitness thunk-array constants: (size_bytes, ordinal_flag, struct_fmt).
+    PE32 thunks are 32-bit DWORDs with the ordinal flag at bit 31;
+    PE32+ thunks are 64-bit QWORDs with the ordinal flag at bit 63.
+    Signature matches `ptr_layout` for symmetry — both take a bool
+    rather than a full PEInfo object."""
+    if is_pe32_plus:
+        return 8, 1 << 63, "<Q"
+    return 4, 1 << 31, "<I"
+
+
+def _walk_thunk_array(
+    data: bytes,
+    pe: PEInfo,
+    thunk_off: int,
+    slot_rva: int,
+    thunk_size: int,
+    thunk_fmt: str,
+    ordinal_flag: int,
+    dll_name: str,
+    dest: List[PEImport],
+) -> None:
+    """Walk a NULL-terminated array of import thunks (used by both
+    eager and delay-load import descriptors). Each thunk is either an
+    ordinal value (high-bit set) or an RVA pointing at a hint/name
+    record. Resolved entries are appended to `dest` as PEImport
+    records keyed by the corresponding IAT slot RVA."""
+    for _ in range(65536):
+        if thunk_off + thunk_size > len(data):
+            break
+        thunk = struct.unpack_from(thunk_fmt, data, thunk_off)[0]
+        if thunk == 0:
+            break
+        name: Optional[str] = None
+        ordinal: Optional[int] = None
+        if thunk & ordinal_flag:
+            ordinal = thunk & 0xFFFF
+        else:
+            hint_off = pe.rva_to_file_offset(thunk & 0x7FFFFFFF)
+            if hint_off is not None and hint_off + 2 < len(data):
+                # Capture the 16-bit hint as a fallback ordinal when
+                # the name string at hint+2 is empty (stripped binary
+                # or hand-crafted IAT).
+                hint = struct.unpack_from("<H", data, hint_off)[0]
+                raw = _read_cstr(data, hint_off + 2)
+                if raw:
+                    name = raw
+                elif hint:
+                    ordinal = hint
+        dest.append(
+            PEImport(dll=dll_name, name=name, ordinal=ordinal, iat_rva=slot_rva)
+        )
+        thunk_off += thunk_size
+        slot_rva += thunk_size
+
+
+def parse_imports(data: bytes, pe: PEInfo) -> None:
+    """Populate `pe.imports` from data directory 1 (the Import Table).
+
+    Reads IMAGE_IMPORT_DESCRIPTOR entries until the null terminator. For
+    each DLL walks the Import Lookup Table (OriginalFirstThunk) — falling
+    back to the IAT (FirstThunk) when the ILT is absent — and records the
+    DLL name, imported symbol, and the IAT slot RVA so call sites that
+    reach the IAT can be tagged with the imported name.
+    """
+    if len(pe.data_directories) <= DIR_IMPORT:
+        return
+    imp_rva, imp_size = pe.data_directories[DIR_IMPORT]
+    if imp_rva == 0 or imp_size == 0:
+        return
+    imp_off = pe.rva_to_file_offset(imp_rva)
+    if imp_off is None:
+        return
+
+    thunk_size, ordinal_flag, thunk_fmt = _thunk_layout(pe.is_pe32_plus)
+
+    desc_off = imp_off
+    # Cap descriptor count at a sane maximum to bound parse cost on
+    # malformed binaries.
+    for _desc_i in range(4096):
+        if desc_off + 20 > len(data):
+            break
+        ilt_rva, _ts, _fwd, dll_name_rva, iat_rva = struct.unpack_from(
+            "<IIIII", data, desc_off
+        )
+        if ilt_rva == 0 and dll_name_rva == 0 and iat_rva == 0:
+            break
+        desc_off += 20
+
+        dll_off = pe.rva_to_file_offset(dll_name_rva)
+        dll_name = _read_cstr(data, dll_off) if dll_off is not None else "?"
+
+        # Prefer the Import Lookup Table; fall back to the IAT if absent.
+        thunk_array_rva = ilt_rva if ilt_rva else iat_rva
+        thunk_off = pe.rva_to_file_offset(thunk_array_rva)
+        if thunk_off is None or iat_rva == 0:
+            # Without an IAT RVA we can't compute meaningful slot VAs for
+            # the entries we discover; skip the descriptor.
+            continue
+
+        _walk_thunk_array(
+            data,
+            pe,
+            thunk_off,
+            iat_rva,
+            thunk_size,
+            thunk_fmt,
+            ordinal_flag,
+            dll_name,
+            pe.imports,
+        )
+
+
+def parse_delay_imports(data: bytes, pe: PEInfo) -> None:
+    """Parse data directory 13 (Delay Import). MSVC `__declspec(dllimport)`
+    with `/DELAYLOAD` produces a separate import table for lazy resolution.
+
+    Each IMAGE_DELAYLOAD_DESCRIPTOR is 32 bytes; entries are NULL-terminated.
+    `Attributes` flag bit 0 indicates RVA-based fields (v2); else they're
+    VAs that need ImageBase subtraction (v1, legacy)."""
+    if len(pe.data_directories) <= DIR_DELAY_IMPORT:
+        return
+    di_rva, di_size = pe.data_directories[DIR_DELAY_IMPORT]
+    if di_rva == 0 or di_size == 0:
+        return
+    di_off = pe.rva_to_file_offset(di_rva)
+    if di_off is None:
+        return
+
+    thunk_size, ordinal_flag, thunk_fmt = _thunk_layout(pe.is_pe32_plus)
+
+    desc_off = di_off
+    end = di_off + di_size
+    for _desc_i in range(4096):
+        if desc_off + 32 > end or desc_off + 32 > len(data):
+            break
+        (
+            attrs,
+            dll_name_field,
+            _module_handle,
+            iat_field,
+            int_field,
+            _bound_iat,
+            _unload_iat,
+            _ts,
+        ) = struct.unpack_from("<IIIIIIII", data, desc_off)
+        if attrs == 0 and dll_name_field == 0 and iat_field == 0 and int_field == 0:
+            break
+        desc_off += 32
+
+        # Translate fields (RVAs for v2, VAs for v1).
+        def to_rva(field: int) -> int:
+            if attrs & 1:
+                return field
+            return field - pe.image_base if field >= pe.image_base else field
+
+        dll_name_rva = to_rva(dll_name_field)
+        iat_rva = to_rva(iat_field)
+        int_rva = to_rva(int_field) if int_field else iat_rva
+
+        dll_off = pe.rva_to_file_offset(dll_name_rva)
+        dll_name = _read_cstr(data, dll_off) if dll_off is not None else "?"
+
+        int_off = pe.rva_to_file_offset(int_rva)
+        if int_off is None or iat_rva == 0:
+            continue
+
+        _walk_thunk_array(
+            data,
+            pe,
+            int_off,
+            iat_rva,
+            thunk_size,
+            thunk_fmt,
+            ordinal_flag,
+            dll_name,
+            pe.delay_imports,
+        )
+
+
+def parse_bound_imports(data: bytes, pe: PEInfo) -> None:
+    """Parse data directory 11 (Bound Import Table).
+
+    Unlike the regular import table, bound-import entries reference DLL
+    names by an offset relative to the start of the bound-import directory
+    itself (NOT an RVA). Entries are 8-byte IMAGE_BOUND_IMPORT_DESCRIPTOR
+    structs terminated by an all-zero entry, optionally followed by
+    forwarder-ref descriptors."""
+    if len(pe.data_directories) <= DIR_BOUND_IMPORT:
+        return
+    bi_rva, bi_size = pe.data_directories[DIR_BOUND_IMPORT]
+    if bi_rva == 0 or bi_size == 0:
+        return
+    bi_off = pe.rva_to_file_offset(bi_rva)
+    if bi_off is None:
+        return
+    end = bi_off + bi_size
+
+    cur = bi_off
+    for _bi_i in range(4096):
+        if cur + 8 > end or cur + 8 > len(data):
+            break
+        ts, name_off, n_fwd = struct.unpack_from("<IHH", data, cur)
+        if ts == 0 and name_off == 0 and n_fwd == 0:
+            break
+        name_abs = bi_off + name_off
+        dll_name = _read_cstr(data, name_abs) if name_abs < len(data) else "?"
+        cur += 8
+        fwds: List[str] = []
+        # Per spec, NumberOfModuleForwarderRefs fits in a WORD — bound by
+        # 0xFFFF entries.
+        for _ in range(min(n_fwd, 0xFFFF)):
+            if cur + 8 > len(data) or cur + 8 > end:
+                break
+            _ts, fname_off, _rsv = struct.unpack_from("<IHH", data, cur)
+            fname_abs = bi_off + fname_off
+            if fname_abs < len(data):
+                fwds.append(_read_cstr(data, fname_abs))
+            cur += 8
+        pe.bound_imports.append(
+            PEBoundImport(dll=dll_name, timestamp=ts, forwarder_refs=fwds)
+        )
+
+
+def parse_load_config(data: bytes, pe: PEInfo) -> None:
+    """Parse data directory 10 (Load Config). Extracts:
+
+    - SecurityCookie VA (`/GS` cookie used to detect stack-buffer overruns)
+    - SEHandlerTable RVA + count: array of valid SEH handler RVAs the
+      MSVC `/SAFESEH` linker switch produced for PE32 binaries.
+    """
+    if len(pe.data_directories) <= DIR_LOAD_CONFIG:
+        return
+    lc_rva, lc_size = pe.data_directories[DIR_LOAD_CONFIG]
+    if lc_rva == 0 or lc_size == 0:
+        return
+    lc_off = pe.rva_to_file_offset(lc_rva)
+    if lc_off is None:
+        return
+
+    if pe.is_pe32_plus:
+        # PE32+ layout: SecurityCookie at +0x58 (QWORD); CFG fields at
+        # +0x70/+0x78/+0x80/+0x88/+0x90.
+        if lc_off + 0x60 > len(data):
+            return
+        pe.security_cookie_va = struct.unpack_from("<Q", data, lc_off + 0x58)[0]
+        if lc_off + 0x98 <= len(data):
+            cfg_table_va = struct.unpack_from("<Q", data, lc_off + 0x80)[0]
+            cfg_count = struct.unpack_from("<Q", data, lc_off + 0x88)[0]
+            pe.cfg_flags = struct.unpack_from("<I", data, lc_off + 0x90)[0]
+            _read_cfg_table(data, pe, cfg_table_va, cfg_count)
+        return
+
+    # PE32 layout: SecurityCookie at +0x3C, SEHandlerTable at +0x40,
+    # SEHandlerCount at +0x44, GuardCFFunctionTable at +0x54,
+    # GuardCFFunctionCount at +0x58, GuardFlags at +0x5C.
+    if lc_off + 0x48 > len(data):
+        return
+    pe.security_cookie_va = struct.unpack_from("<I", data, lc_off + 0x3C)[0]
+    seh_table_va = struct.unpack_from("<I", data, lc_off + 0x40)[0]
+    seh_count = struct.unpack_from("<I", data, lc_off + 0x44)[0]
+    if seh_table_va and seh_count:
+        # SEHandlerTable is a VA (already absolute), array of DWORD RVAs.
+        table_off = pe.va_to_file_offset(seh_table_va)
+        if table_off is not None:
+            # Real binaries declare hundreds of SEH handlers at most;
+            # cap at 1M for fuzz safety.
+            for i in range(min(seh_count, 1_000_000)):
+                entry_off = table_off + i * 4
+                if entry_off + 4 > len(data):
+                    break
+                pe.safe_seh_handlers.append(
+                    struct.unpack_from("<I", data, entry_off)[0]
+                )
+
+    if lc_off + 0x60 <= len(data):
+        cfg_table_va = struct.unpack_from("<I", data, lc_off + 0x54)[0]
+        cfg_count = struct.unpack_from("<I", data, lc_off + 0x58)[0]
+        pe.cfg_flags = struct.unpack_from("<I", data, lc_off + 0x5C)[0]
+        _read_cfg_table(data, pe, cfg_table_va, cfg_count)
+
+
+def _read_cfg_table(data: bytes, pe: PEInfo, table_va: int, count: int) -> None:
+    """Read a GuardCFFunctionTable. Each entry is at least 4 bytes (RVA);
+    the high bits of `GuardFlags` indicate optional metadata bytes that
+    follow each RVA. We compute the per-entry stride and harvest only the
+    RVA from each slot."""
+    if not table_va or not count:
+        return
+    table_off = pe.va_to_file_offset(table_va)
+    if table_off is None:
+        return
+    # GuardFlags bits 28..31 hold the count of extra metadata bytes per
+    # entry, capped to 7. Stride is 4 + extra_bytes.
+    extra = (pe.cfg_flags >> 28) & 0x0F
+    stride = 4 + min(extra, 7)
+    # /guard:cf tables can be large (Windows 10 ntdll has ~40k entries)
+    # but a megabyte of guarded functions is well beyond realistic.
+    for i in range(min(count, 1_000_000)):
+        entry_off = table_off + i * stride
+        if entry_off + 4 > len(data):
+            break
+        rva = struct.unpack_from("<I", data, entry_off)[0]
+        pe.cfg_function_rvas.append(rva)
+
+
+def parse_exception_table(data: bytes, pe: PEInfo) -> None:
+    """Parse data directory 3 (Exception Table). For PE32+ this is an
+    array of RUNTIME_FUNCTION entries (12 bytes each) giving definitive
+    function boundaries — useful both for surfacing real function starts
+    and for navigating SEH unwind data."""
+    if len(pe.data_directories) <= DIR_EXCEPTION:
+        return
+    et_rva, et_size = pe.data_directories[DIR_EXCEPTION]
+    if et_rva == 0 or et_size == 0:
+        return
+    et_off = pe.rva_to_file_offset(et_rva)
+    if et_off is None:
+        return
+    end = min(et_off + et_size, len(data))
+    # Bound at ~1M RUNTIME_FUNCTION entries: more than any realistic PE.
+    max_entries = min((end - et_off) // 12, 1_000_000)
+    for i in range(max_entries):
+        cur = et_off + i * 12
+        if cur + 12 > end:
+            break
+        begin, fin, uw = struct.unpack_from("<III", data, cur)
+        if begin == 0 and fin == 0 and uw == 0:
+            break
+        pe.runtime_functions.append((begin, fin, uw))
+
+
+RELOC_TYPE_ABSOLUTE = 0
+RELOC_TYPE_HIGHLOW = 3
+RELOC_TYPE_DIR64 = 10
+
+
+def parse_relocations(data: bytes, pe: PEInfo) -> None:
+    """Populate `pe.pointer_rvas` from data directory 5 (the Base Relocation
+    Table). Each block applies to one 4 KB page; entries of type 3
+    (HIGHLOW, 32-bit) or 10 (DIR64, 64-bit) mark RVAs of absolute pointers
+    that the PE loader needs to rebase. Padding entries (type 0) are
+    skipped."""
+    if len(pe.data_directories) <= DIR_BASERELOC:
+        return
+    rel_rva, rel_size = pe.data_directories[DIR_BASERELOC]
+    if rel_rva == 0 or rel_size == 0:
+        return
+    rel_off = pe.rva_to_file_offset(rel_rva)
+    if rel_off is None:
+        return
+
+    end = rel_off + rel_size
+    accept = {RELOC_TYPE_HIGHLOW, RELOC_TYPE_DIR64}
+    block = rel_off
+    while block + 8 <= end and block + 8 <= len(data):
+        page_rva, block_size = struct.unpack_from("<II", data, block)
+        if block_size < 8 or block + block_size > end:
+            break
+        entries_end = block + block_size
+        entry = block + 8
+        while entry + 2 <= entries_end:
+            word = struct.unpack_from("<H", data, entry)[0]
+            entry += 2
+            kind = word >> 12
+            if kind == RELOC_TYPE_ABSOLUTE:
+                continue
+            if kind not in accept:
+                continue
+            pe.pointer_rvas.add(page_rva + (word & 0x0FFF))
+        block += block_size
+
+
+def parse_debug(data: bytes, pe: PEInfo) -> None:
+    """Populate `pe.pdb_path` from data directory 6 (Debug). Walks the
+    IMAGE_DEBUG_DIRECTORY array, looking for a CodeView (type 2) entry that
+    embeds either an `RSDS`/`NB10` record with a trailing PDB filename."""
+    if len(pe.data_directories) <= DIR_DEBUG:
+        return
+    dbg_rva, dbg_size = pe.data_directories[DIR_DEBUG]
+    if dbg_rva == 0 or dbg_size == 0:
+        return
+    dbg_off = pe.rva_to_file_offset(dbg_rva)
+    if dbg_off is None:
+        return
+
+    end = dbg_off + dbg_size
+    entry = dbg_off
+    while entry + 28 <= end and entry + 28 <= len(data):
+        (
+            _chars,
+            _ts,
+            _vmaj,
+            _vmin,
+            entry_type,
+            size_of_data,
+            _addr_of_raw,
+            ptr_to_raw,
+        ) = struct.unpack_from("<IIHHIIII", data, entry)
+        entry += 28
+        if entry_type != 2:  # IMAGE_DEBUG_TYPE_CODEVIEW
+            continue
+        if ptr_to_raw == 0 or size_of_data < 4:
+            continue
+        if ptr_to_raw + size_of_data > len(data):
+            continue
+        cv = data[ptr_to_raw : ptr_to_raw + size_of_data]
+        magic = cv[:4]
+        if magic == b"RSDS" and len(cv) >= 24:
+            # Layout: magic(4) + GUID(16) + age(4) + name
+            g0, g1, g2, g3 = struct.unpack_from("<IHH8s", cv, 4)
+            tail = "-".join(f"{b:02X}" for b in g3)
+            pe.pdb_guid = f"{g0:08X}-{g1:04X}-{g2:04X}-{tail}"
+            pe.pdb_age = struct.unpack_from("<I", cv, 20)[0]
+            pe.pdb_path = _read_cstr(cv, 24)
+            return
+        if magic == b"NB10" and len(cv) >= 16:
+            # Layout: magic(4) + offset(4) + signature(4) + age(4) + name
+            sig = struct.unpack_from("<I", cv, 8)[0]
+            pe.pdb_guid = f"{sig:08X}"
+            pe.pdb_age = struct.unpack_from("<I", cv, 12)[0]
+            pe.pdb_path = _read_cstr(cv, 16)
+            return
+
+
+def parse_tls(data: bytes, pe: PEInfo) -> None:
+    """Populate `pe.tls_callback_vas` from data directory 9 (TLS).
+    `IMAGE_TLS_DIRECTORY` for PE32 has AddressOfCallBacks at offset 0x0C
+    (PE32+ at 0x18). The pointer dereferences to a NULL-terminated array
+    of callback VAs."""
+    if len(pe.data_directories) <= DIR_TLS:
+        return
+    tls_rva, tls_size = pe.data_directories[DIR_TLS]
+    if tls_rva == 0 or tls_size == 0:
+        return
+    tls_off = pe.rva_to_file_offset(tls_rva)
+    if tls_off is None:
+        return
+
+    if pe.is_pe32_plus:
+        if tls_off + 0x20 > len(data):
+            return
+        cb_va = struct.unpack_from("<Q", data, tls_off + 0x18)[0]
+        ptr_size = 8
+        ptr_fmt = "<Q"
+    else:
+        if tls_off + 0x10 > len(data):
+            return
+        cb_va = struct.unpack_from("<I", data, tls_off + 0x0C)[0]
+        ptr_size = 4
+        ptr_fmt = "<I"
+
+    if cb_va == 0:
+        return
+    cb_off = pe.va_to_file_offset(cb_va)
+    if cb_off is None:
+        return
+
+    # Bound at 1024 TLS callbacks — already absurd for a real binary.
+    for _cb_i in range(1024):
+        if cb_off + ptr_size > len(data):
+            break
+        v = struct.unpack_from(ptr_fmt, data, cb_off)[0]
+        if v == 0:
+            break
+        pe.tls_callback_vas.append(v)
+        cb_off += ptr_size
+
+
+def linker_version_label(major: int, minor: int) -> str:
+    """Translate an `IMAGE_OPTIONAL_HEADER.MajorLinkerVersion` value into a
+    rough Visual C++ / linker product name. Real binaries are usually
+    produced by Microsoft's `link.exe`; the major number tracks the MSVC
+    release closely enough to surface as a hint."""
+    mapping = {
+        2: "MSVC 2.x",
+        3: "MSVC 4.x",
+        4: "MSVC 4.x",
+        5: "MSVC 5.0",
+        6: "MSVC 6.0",
+        7: "MSVC 7.0 / VS .NET 2002",
+        8: "MSVC 8.0 / VS 2005",
+        9: "MSVC 9.0 / VS 2008",
+        10: "MSVC 10.0 / VS 2010",
+        11: "MSVC 11.0 / VS 2012",
+        12: "MSVC 12.0 / VS 2013",
+        14: "MSVC 14.x / VS 2015-2022",
+    }
+    return mapping.get(major, f"linker v{major}.{minor:02d}")
+
+
+RESOURCE_TYPE_NAMES = {
+    1: "CURSOR",
+    2: "BITMAP",
+    3: "ICON",
+    4: "MENU",
+    5: "DIALOG",
+    6: "STRING",
+    7: "FONTDIR",
+    8: "FONT",
+    9: "ACCELERATOR",
+    10: "RCDATA",
+    11: "MESSAGETABLE",
+    12: "GROUP_CURSOR",
+    14: "GROUP_ICON",
+    16: "VERSION",
+    17: "DLGINCLUDE",
+    19: "PLUGPLAY",
+    20: "VXD",
+    21: "ANICURSOR",
+    22: "ANIICON",
+    23: "HTML",
+    24: "MANIFEST",
+}
+
+
+def _read_resource_name(data: bytes, name_field: int, root_off: int) -> object:
+    """Return either the integer ID, or the decoded UTF-16 name string."""
+    if name_field & 0x80000000:
+        name_off = root_off + (name_field & 0x7FFFFFFF)
+        if name_off + 2 > len(data):
+            return name_field & 0x7FFFFFFF
+        length = struct.unpack_from("<H", data, name_off)[0]
+        text_off = name_off + 2
+        text_end = text_off + length * 2
+        if text_end > len(data):
+            return name_field & 0x7FFFFFFF
+        return data[text_off:text_end].decode("utf-16-le", errors="replace")
+    return name_field
+
+
+def parse_resources(data: bytes, pe: PEInfo) -> None:
+    """Walk the .rsrc tree (3 nominal levels: type → name → language) and
+    record each leaf in `pe.resources`."""
+    if len(pe.data_directories) <= DIR_RESOURCE:
+        return
+    rsrc_rva, rsrc_size = pe.data_directories[DIR_RESOURCE]
+    if rsrc_rva == 0 or rsrc_size == 0:
+        return
+    root_off = pe.rva_to_file_offset(rsrc_rva)
+    if root_off is None:
+        return
+
+    def walk_dir(dir_off: int, depth: int, path: tuple) -> None:
+        # Bound the recursion: a valid resource tree has only 3 levels
+        # (type → name → language). Anything deeper indicates a circular
+        # reference or malformed data — bail out rather than recurse.
+        if depth > 8:
+            return
+        if dir_off + 16 > len(data):
+            return
+        (
+            _chars,
+            _ts,
+            _vmaj,
+            _vmin,
+            n_named,
+            n_id,
+        ) = struct.unpack_from("<IIHHHH", data, dir_off)
+        # Cap entry count so a fuzzed PE can't make us iterate forever.
+        total = min(n_named + n_id, 65536)
+        entry_off = dir_off + 16
+        for _ in range(total):
+            if entry_off + 8 > len(data):
+                return
+            name_field, data_field = struct.unpack_from("<II", data, entry_off)
+            entry_off += 8
+            name_val = _read_resource_name(data, name_field, root_off)
+            if data_field & 0x80000000:
+                sub_off = root_off + (data_field & 0x7FFFFFFF)
+                walk_dir(sub_off, depth + 1, path + (name_val,))
+            else:
+                leaf_off = root_off + data_field
+                if leaf_off + 16 > len(data):
+                    continue
+                leaf_rva, leaf_size, _cp, _rsv = struct.unpack_from(
+                    "<IIII", data, leaf_off
+                )
+                if len(path) >= 2:
+                    rtype, rid = path[0], path[1]
+                else:
+                    rtype, rid = path[0], None
+                pe.resources.append(
+                    PEResource(
+                        rtype=rtype,
+                        rid=rid,
+                        language=name_val if isinstance(name_val, int) else 0,
+                        rva=leaf_rva,
+                        size=leaf_size,
+                    )
+                )
+
+    walk_dir(root_off, 0, ())
+
+
+def _align4(off: int) -> int:
+    return (off + 3) & ~3
+
+
+def _read_wstr(blob: bytes, off: int) -> tuple:
+    """Read a UTF-16 NUL-terminated string starting at `off`. Returns
+    (decoded_string, next_offset_past_terminator)."""
+    end = off
+    while end + 1 < len(blob):
+        if blob[end] == 0 and blob[end + 1] == 0:
+            break
+        end += 2
+    text = blob[off:end].decode("utf-16-le", errors="replace")
+    return text, end + 2
+
+
+def _walk_versioninfo_node(blob: bytes, off: int, out: dict, base_off: int) -> int:
+    """Walk one VS_VERSIONINFO-style node starting at `off`. Recurses into
+    children. Strings are recorded into `out`. Returns the offset just past
+    this node (already aligned)."""
+    if off + 6 > len(blob):
+        return len(blob)
+    w_length, w_value_length, w_type = struct.unpack_from("<HHH", blob, off)
+    node_end = off + w_length
+    if w_length == 0 or node_end > len(blob):
+        return len(blob)
+    key, body_off = _read_wstr(blob, off + 6)
+    body_off = _align4(body_off - base_off) + base_off
+
+    value_end = body_off
+    if (
+        w_value_length > 0
+        and body_off + (w_value_length * (2 if w_type == 1 else 1)) <= node_end
+    ):
+        if w_type == 1:  # text — value is UTF-16
+            value = blob[body_off : body_off + w_value_length * 2]
+            # Strip trailing NUL WCHAR(s) without splitting on misaligned
+            # zero pairs.
+            chars = [
+                value[i : i + 2] for i in range(0, len(value) - (len(value) & 1), 2)
+            ]
+            text_chars = []
+            for wch in chars:
+                if wch == b"\x00\x00":
+                    break
+                text_chars.append(wch)
+            value_text = b"".join(text_chars).decode("utf-16-le", errors="replace")
+            if key not in {"VS_VERSION_INFO", "StringFileInfo", "VarFileInfo"}:
+                out[key] = value_text
+            value_end = body_off + w_value_length * 2
+        else:
+            # Binary value. The VarFileInfo's "Translation" child carries
+            # an array of (LANGID, codepage) WORD pairs as binary data —
+            # one entry per locale supported by the version resource.
+            if key == "Translation" and w_value_length >= 4:
+                pairs = []
+                pair_off = body_off
+                while pair_off + 4 <= node_end:
+                    langid, codepage = struct.unpack_from("<HH", blob, pair_off)
+                    pairs.append((langid, codepage))
+                    pair_off += 4
+                    if pair_off - body_off >= w_value_length:
+                        break
+                # Render as a comma-separated list of `0xLLLL/0xCCCC`
+                # so the version_info dict stays str→str.
+                out["Translation"] = ", ".join(
+                    f"0x{lid:04X}/0x{cp:04X}" for lid, cp in pairs
+                )
+            value_end = body_off + w_value_length
+    value_end = _align4(value_end - base_off) + base_off
+
+    # Recurse into children, if any space remains.
+    child = value_end
+    while child < node_end:
+        next_child = _walk_versioninfo_node(blob, child, out, base_off)
+        if next_child <= child:
+            break
+        child = _align4(next_child - base_off) + base_off
+
+    return node_end
+
+
+_UNWIND_OP_NAMES = {
+    0: "PUSH_NONVOL",
+    1: "ALLOC_LARGE",
+    2: "ALLOC_SMALL",
+    3: "SET_FPREG",
+    4: "SAVE_NONVOL",
+    5: "SAVE_NONVOL_FAR",
+    6: "EPILOG",
+    7: "SPARE_CODE",
+    8: "SAVE_XMM128",
+    9: "SAVE_XMM128_FAR",
+    10: "PUSH_MACHFRAME",
+}
+
+
+def parse_clr_header(data: bytes, pe: PEInfo) -> None:
+    """Parse data directory 14 (CLR Runtime Header) when present.
+    Identifies the binary as a .NET assembly and surfaces metadata /
+    entry-point / strong-name fields so the analyst doesn't have to
+    chase down the assembly's structure manually."""
+    if len(pe.data_directories) <= DIR_COM_DESCRIPTOR:
+        return
+    clr_rva, clr_size = pe.data_directories[DIR_COM_DESCRIPTOR]
+    if clr_rva == 0 or clr_size == 0:
+        return
+    f_off = pe.rva_to_file_offset(clr_rva)
+    if f_off is None or f_off + 72 > len(data):
+        return
+    (
+        cb_size,
+        rt_major,
+        rt_minor,
+        md_rva,
+        md_size,
+        flags,
+        entry_tok,
+        res_rva,
+        res_size,
+        sn_rva,
+        sn_size,
+    ) = struct.unpack_from("<IHHIIIIIIII", data, f_off)
+    pe.clr_header = CLRHeader(
+        cb_size=cb_size,
+        runtime_major=rt_major,
+        runtime_minor=rt_minor,
+        metadata_rva=md_rva,
+        metadata_size=md_size,
+        flags=flags,
+        entry_point_token_or_rva=entry_tok,
+        resources_rva=res_rva,
+        resources_size=res_size,
+        strong_name_signature_rva=sn_rva,
+        strong_name_signature_size=sn_size,
+    )
+
+
+def parse_unwind_info(data: bytes, pe: PEInfo) -> None:
+    """Decode each PE32+ RUNTIME_FUNCTION's IMAGE_UNWIND_INFO record.
+
+    The Microsoft x64 SEH spec lays UNWIND_INFO out as:
+        +0  : byte    Version (low 3 bits) | Flags (high 5 bits)
+        +1  : byte    SizeOfProlog
+        +2  : byte    CountOfUnwindCodes
+        +3  : byte    FrameRegister (low 4 bits) | FrameRegOffset*16 (high 4)
+        +4  : code[]  CountOfUnwindCodes × 2 bytes
+        +    : padding to QWORD
+        +    : optional handler / chain-info (per flags)
+
+    Each unwind code is a `(prolog_offset, opcode, info)` triple. We
+    only decode the ops + chained-record pointer; exception-handler
+    data isn't surfaced (rarely useful from a disassembly viewpoint).
+    """
+    if not pe.is_pe32_plus:
+        return
+    seen: Set[int] = set()
+    for begin, _end, raw_uw in pe.runtime_functions:
+        uw = raw_uw & 0x7FFFFFFF
+        if not uw or uw in seen:
+            continue
+        seen.add(uw)
+        f_off = pe.rva_to_file_offset(uw)
+        if f_off is None or f_off + 4 > len(data):
+            continue
+        b0, b1, n_codes, b3 = struct.unpack_from("<BBBB", data, f_off)
+        version = b0 & 0x07
+        flags = b0 >> 3
+        prolog_size = b1
+        frame_register = b3 & 0x0F
+        frame_register_offset = (b3 >> 4) * 16
+
+        codes_off = f_off + 4
+        codes_end = codes_off + n_codes * 2
+        if codes_end > len(data):
+            continue
+        codes: List[Tuple[int, str, int]] = []
+        i = 0
+        while i < n_codes:
+            code_off = codes_off + i * 2
+            offset_in_prolog = data[code_off]
+            packed = data[code_off + 1]
+            op = packed & 0x0F
+            info = packed >> 4
+            codes.append((offset_in_prolog, _UNWIND_OP_NAMES.get(op, f"op{op}"), info))
+            # Ops 1, 4, 5, 8, 9 carry extra slots — skip them so we
+            # don't misread the next code's prolog_offset.
+            extra_slots = {1: 1 + (1 if info else 0), 4: 1, 5: 2, 8: 1, 9: 2}.get(op, 0)
+            i += 1 + extra_slots
+
+        chained_rva: Optional[int] = None
+        if flags & 0x04:  # UNW_FLAG_CHAININFO
+            # The chained RUNTIME_FUNCTION starts at the byte
+            # immediately after the unwind codes, aligned to DWORD.
+            chain_off = (codes_end + 3) & ~3
+            if chain_off + 12 <= len(data):
+                chained_rva = struct.unpack_from("<I", data, chain_off)[0]
+
+        pe.unwind_info[uw] = UnwindInfo(
+            version=version,
+            flags=flags,
+            prolog_size=prolog_size,
+            frame_register=frame_register,
+            frame_register_offset=frame_register_offset,
+            codes=codes,
+            chained_function_rva=chained_rva,
+        )
+
+
+def parse_coff_symtab(data: bytes, pe: PEInfo) -> None:
+    """Parse the deprecated COFF symbol table when the optional header
+    points at one. Modern MSVC binaries leave PointerToSymbolTable
+    zero and ship debug info via PDB; this parser exists so vintage
+    MSVC 4-6 binaries (and some object-file-style PEs) get their
+    embedded symbol records surfaced as `pe.coff_symbols`.
+
+    Each IMAGE_SYMBOL record is 18 bytes:
+      0   :  8 bytes  Name (zero-terminated; if first 4 bytes are 0
+                              the next 4 bytes are a string-table offset)
+      8   :  4 bytes  Value
+      12  :  2 bytes  SectionNumber (signed: 0/-1/-2 are special)
+      14  :  2 bytes  Type
+      16  :  1 byte   StorageClass
+      17  :  1 byte   NumberOfAuxSymbols
+
+    The string table immediately follows the symbol records; its
+    leading DWORD is its total length.
+    """
+    if not pe.coff_symtab_ptr or not pe.coff_num_symbols:
+        return
+    base = pe.coff_symtab_ptr
+    n_syms = pe.coff_num_symbols
+    end = base + n_syms * 18
+    if end > len(data):
+        return
+    str_table_off = end
+
+    def _read_name(record_off: int) -> str:
+        name_bytes = data[record_off : record_off + 8]
+        # If the first 4 bytes are zero, the next 4 are the string-
+        # table offset (relative to the string table base).
+        if name_bytes[:4] == b"\x00\x00\x00\x00":
+            str_off = struct.unpack_from("<I", name_bytes, 4)[0]
+            abs_off = str_table_off + str_off
+            if abs_off >= len(data):
+                return ""
+            return _read_cstr(data, abs_off)
+        return name_bytes.split(b"\x00", 1)[0].decode("ascii", errors="replace")
+
+    # Cap iteration the same way other parsers do.
+    i = 0
+    while i < min(n_syms, 1_000_000):
+        rec = base + i * 18
+        if rec + 18 > len(data):
+            break
+        name = _read_name(rec)
+        value, section_number, sym_type, storage_class, aux = struct.unpack_from(
+            "<IhHBB", data, rec + 8
+        )
+        pe.coff_symbols.append(
+            COFFSymbol(
+                name=name,
+                value=value,
+                section_number=section_number,
+                sym_type=sym_type,
+                storage_class=storage_class,
+                aux_records=aux,
+            )
+        )
+        # Skip the aux records — splat doesn't currently surface them
+        # but their slot count needs to advance the iterator so the
+        # next named symbol lines up.
+        i += 1 + aux
+
+
+def parse_version_info(data: bytes, pe: PEInfo) -> None:
+    """Decode the VS_VERSIONINFO StringTable entries from every VERSION
+    resource (`rtype == 16`). Populates `pe.version_info` with keys like
+    `CompanyName`, `FileVersion`, `ProductName`, `OriginalFilename`."""
+    for r in pe.resources:
+        if not (isinstance(r.rtype, int) and r.rtype == 16):
+            continue
+        f_off = pe.rva_to_file_offset(r.rva)
+        if f_off is None:
+            continue
+        if f_off + r.size > len(data):
+            continue
+        blob = data[f_off : f_off + r.size]
+        try:
+            _walk_versioninfo_node(blob, 0, pe.version_info, 0)
+        except Exception:
+            # Malformed VERSIONINFO; leave whatever we already extracted.
+            continue
+
+
+def init(target_bytes: bytes):
+    """Splat platform entry point — called once per run with the full
+    target file bytes. Parses the PE, stashes the result in the
+    module-level `info` and `raw_image` globals (consulted by every
+    segtype + the disassembler), and rejects architectures we don't
+    have a Capstone backend for (ARM32 / ARM64 / unsupported machines)
+    with a friendly error pointing the user at the limitation."""
+    global info, raw_image
+    info = parse_pe(target_bytes)
+    raw_image = target_bytes
+    if info.machine in (MACHINE_ARM64, MACHINE_ARM32):
+        log.error(
+            f"win32 target uses ARM architecture (machine 0x{info.machine:04X}); "
+            "the splat win32 platform currently only supports x86 / x86_64 "
+            "(Capstone-driven disassembly). PE structures parse cleanly but "
+            "instruction decode would need a separate backend."
+        )
+    if info.machine not in (MACHINE_I386, MACHINE_AMD64):
+        log.error(
+            f"win32 target has unsupported machine type 0x{info.machine:04X} "
+            "(only i386 / amd64 are recognized)"
+        )

From 03f11fef293f7cfc3b6bb0caddc4a9ecae767263 Mon Sep 17 00:00:00 2001
From: "Marcel W. Wysocki" <maci.stgn@gmail.com>
Date: Wed, 20 May 2026 11:55:32 +0800
Subject: [PATCH 2/5] win32: add segtypes (header, text/asm, data, rodata, bss,
 bin, pdata)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Eight segtype modules under src/splat/segtypes/win32/:

- header.py — emits a structured .section .header byte-by-byte
  dump of the DOS stub + COFF + optional header (PE32 and PE32+
  variants) + every data directory + the section table. Each field
  renders as a width-correct .short / .long / .quad directive
  with a trailing comment naming the field. A one-page human-
  readable summary block (Machine / ImageBase / EntryPoint /
  Subsystem / characteristics flags by name / sections / exports /
  imports grouped by DLL / PDB GUID/age / TLS / resources / .NET CLR
  fields) precedes the byte emission.

- text.py — two-pass Capstone disassembly. The first pass walks
  every direct call / jmp <imm> target inside the segment to seed
  function / branch labels (func_<va>, loc_<va>); the second
  emits instructions with operand strings rewritten so addresses,
  IAT slots, exports, and RIP-relative loads resolve to readable
  labels. GAS-incompatible Capstone outputs (popal, xword ptr,
  scalar SSE size qualifiers, riz/eiz SIB placeholders,
  oversized enter immediates, etc.) are rewritten so the .s
  output assembles cleanly. With exact_encoding: true the
  instruction bytes are emitted verbatim with the decoded mnemonic
  as a trailing comment — necessary for byte-identical round-trip
  through GAS + objcopy.

- asm.py — alias so YAML can use type: asm (other splat
  platforms' convention).

- data.py — heuristic emission of .data bytes:
    - pointer slots flagged by base-relocations -> .long / .quad
      with symbolic target labels (or raw hex when exact_encoding);
    - NUL-terminated printable runs -> .asciz;
    - UTF-16LE wide strings -> raw .byte plus a /* L"..." */
      preview comment;
    - long zero runs collapsed into .space N directives.

- rodata.py — alias on top of data.py with LINKER_SECTION = .rodata
  and string-detection / pointer-heuristic on by default (read-only
  data overwhelmingly contains strings or function-pointer tables).

- bss.py — NOLOAD reservation with .space N derived from the
  YAML's bss_size: or vram_end - vram_start.

- bin.py — opaque marker class reusing CommonSegBin for sections
  whose bytes are structured loader-time data (.rsrc / .reloc /
  .idata / coff_symtab / signature).

- pdata.py — PE32+ exception directory: each RUNTIME_FUNCTION
  record renders as a .long Begin, End, Unwind row with
  func_<va> labels resolved by reflinking through the symbol
  table. The unwind RVA is emitted symbolically as
  (unwind_<va> - ImageBase) | 0x80000000 so the chained-record
  flag stays correct. In exact_encoding mode the rows emit raw
  hex RVAs for byte-identical reassembly. Each row's trailing
  comment carries the decoded UNWIND_INFO opcode list (PUSH_NONVOL,
  ALLOC_SMALL, SAVE_NONVOL, etc.) when one was found.

Behaviour is gated entirely off the win32 platform module added in
the previous commit — no changes to common splat segtypes.
---
 src/splat/segtypes/win32/__init__.py |   8 +
 src/splat/segtypes/win32/asm.py      |  14 +
 src/splat/segtypes/win32/bin.py      |  17 +
 src/splat/segtypes/win32/bss.py      |  61 ++
 src/splat/segtypes/win32/data.py     | 496 ++++++++++++++++
 src/splat/segtypes/win32/header.py   | 713 +++++++++++++++++++++++
 src/splat/segtypes/win32/pdata.py    | 196 +++++++
 src/splat/segtypes/win32/rodata.py   |  27 +
 src/splat/segtypes/win32/text.py     | 841 +++++++++++++++++++++++++++
 9 files changed, 2373 insertions(+)
 create mode 100644 src/splat/segtypes/win32/__init__.py
 create mode 100644 src/splat/segtypes/win32/asm.py
 create mode 100644 src/splat/segtypes/win32/bin.py
 create mode 100644 src/splat/segtypes/win32/bss.py
 create mode 100644 src/splat/segtypes/win32/data.py
 create mode 100644 src/splat/segtypes/win32/header.py
 create mode 100644 src/splat/segtypes/win32/pdata.py
 create mode 100644 src/splat/segtypes/win32/rodata.py
 create mode 100644 src/splat/segtypes/win32/text.py

diff --git a/src/splat/segtypes/win32/__init__.py b/src/splat/segtypes/win32/__init__.py
new file mode 100644
index 00000000..a548542c
--- /dev/null
+++ b/src/splat/segtypes/win32/__init__.py
@@ -0,0 +1,8 @@
+from . import header as header
+from . import text as text
+from . import asm as asm
+from . import data as data
+from . import rodata as rodata
+from . import bss as bss
+from . import bin as bin
+from . import pdata as pdata
diff --git a/src/splat/segtypes/win32/asm.py b/src/splat/segtypes/win32/asm.py
new file mode 100644
index 00000000..f4661c9c
--- /dev/null
+++ b/src/splat/segtypes/win32/asm.py
@@ -0,0 +1,14 @@
+"""`type: asm` alias for the win32 text segment.
+
+Lets win32 YAML use the more conventional `asm` segtype name (matching the
+other platforms) instead of `text`. Same behaviour as `Win32SegText`."""
+
+from .text import Win32SegText
+
+
+class Win32SegAsm(Win32SegText):
+    """Alias for Win32SegText so YAML can use `type: asm` (the
+    convention on other splat platforms) instead of `type: text`.
+    No behavioural difference."""
+
+    pass
diff --git a/src/splat/segtypes/win32/bin.py b/src/splat/segtypes/win32/bin.py
new file mode 100644
index 00000000..8f6f3d29
--- /dev/null
+++ b/src/splat/segtypes/win32/bin.py
@@ -0,0 +1,17 @@
+"""Win32 binary blob segment — reuse the common bin segment for things like
+.rsrc / .reloc / .idata / coff_symtab / signature where the section's
+bytes are structured loader-time data rather than meaningful code or
+labelled pointers. Splat writes the raw bytes to a `.bin` file under
+asset_path; the linker layout (or the win32_reassemble post-process)
+incorporates them at the right rom_start without any decoding pass."""
+
+from ..common.bin import CommonSegBin
+
+
+class Win32SegBin(CommonSegBin):
+    """Win32-specific marker class — same behaviour as CommonSegBin,
+    re-exported so YAML `type: bin` resolves through the win32
+    segtype lookup. Used for .rsrc / .reloc / .idata / coff_symtab /
+    signature segments produced by create_win32_config."""
+
+    pass
diff --git a/src/splat/segtypes/win32/bss.py b/src/splat/segtypes/win32/bss.py
new file mode 100644
index 00000000..e53fb04c
--- /dev/null
+++ b/src/splat/segtypes/win32/bss.py
@@ -0,0 +1,61 @@
+"""Win32 .bss segment — emits a NOLOAD reservation."""
+
+from pathlib import Path
+from typing import Optional
+
+from ..common.segment import CommonSegment
+from ...util import options
+
+
+class Win32SegBss(CommonSegment):
+    """Uninitialised data segment (`.bss`).
+
+    Emits a `.section .bss, "wa"` block with a single `.space N`
+    directive — the loader zero-fills these bytes at map time, so
+    they have no on-file representation. `reserved_size` resolves to
+    the YAML's `bss_size:` value if set, else `vram_end - vram_start`,
+    else zero (in which case the segment is degenerate and the create-
+    config layer skips it)."""
+
+    @staticmethod
+    def is_noload() -> bool:
+        return True
+
+    def get_linker_section(self) -> str:
+        return ".bss"
+
+    def get_section_flags(self) -> Optional[str]:
+        return "wa"
+
+    def out_path(self) -> Path:
+        return options.opts.data_path / self.dir / f"{self.name}.s"
+
+    @property
+    def reserved_size(self) -> int:
+        if isinstance(self.yaml, dict):
+            sz = self.yaml.get("bss_size")
+            if sz is not None:
+                return int(sz)
+        if self.vram_start is not None and self.vram_end is not None:
+            return self.vram_end - self.vram_start
+        return 0
+
+    def should_split(self) -> bool:
+        return self.extract and options.opts.is_mode_active("code")
+
+    def split(self, rom_bytes: bytes):
+        path = self.out_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+
+        size = self.reserved_size
+        with path.open("w", encoding="utf-8", newline="\n") as f:
+            preamble = options.opts.generated_s_preamble
+            if preamble:
+                f.write(preamble + "\n\n")
+            f.write(self.get_section_asm_line() + "\n\n")
+            f.write(f".global {self.name}\n")
+            f.write(f"{self.name}:\n")
+            if size > 0:
+                f.write(f"    .space 0x{size:X}\n")
+
+        self.log(f"Wrote {self.name} to {path}")
diff --git a/src/splat/segtypes/win32/data.py b/src/splat/segtypes/win32/data.py
new file mode 100644
index 00000000..338d5054
--- /dev/null
+++ b/src/splat/segtypes/win32/data.py
@@ -0,0 +1,496 @@
+"""Win32 .data segment — dumped as a `.byte` block so it can be reassembled
+without depending on spimdisasm's data analyzer."""
+
+import struct
+from pathlib import Path
+from typing import List, Optional
+
+from ..common.segment import CommonSegment
+from ...util import options
+
+
+# Minimum length (excluding the NUL terminator) of an ASCII run that should
+# be emitted as `.asciz` rather than raw bytes.
+STRING_MIN_LEN = 4
+
+
+def _is_string_byte(b: int) -> bool:
+    # ASCII control/printable subset OR Latin-1 Supplement printables.
+    # Mirrors the wide-string scanner; covers localised ANSI resources
+    # written in Western European code pages.
+    return b == 0x09 or b == 0x0A or b == 0x0D or 0x20 <= b <= 0x7E or 0xA0 <= b <= 0xFF
+
+
+def _escape_string(raw: bytes) -> str:
+    out = []
+    for b in raw:
+        if b == ord('"'):
+            out.append('\\"')
+        elif b == ord("\\"):
+            out.append("\\\\")
+        elif b == 0x0A:
+            out.append("\\n")
+        elif b == 0x0D:
+            out.append("\\r")
+        elif b == 0x09:
+            out.append("\\t")
+        elif 0x20 <= b <= 0x7E:
+            out.append(chr(b))
+        else:
+            out.append(f"\\x{b:02x}")
+    return "".join(out)
+
+
+def _scan_string(data: bytes, start: int) -> Optional[int]:
+    """If a printable run beginning at `start` and ending in a NUL byte is
+    at least STRING_MIN_LEN characters long, return the end offset (one
+    past the NUL). Otherwise return None."""
+    i = start
+    while i < len(data) and _is_string_byte(data[i]):
+        i += 1
+    if i >= len(data) or data[i] != 0:
+        return None
+    if (i - start) < STRING_MIN_LEN:
+        return None
+    return i + 1
+
+
+# Minimum length in WCHARs (excluding the WCHAR NUL terminator) for a
+# UTF-16LE string to be recognised as such.
+WIDE_STRING_MIN_LEN = 4
+
+
+def _scan_wide_string(data: bytes, start: int) -> Optional[int]:
+    """Detect a UTF-16LE printable run terminated by `\\x00\\x00`. Returns
+    the end offset (one past the terminating WCHAR), or None when no
+    valid wide string of sufficient length is present.
+
+    Only matches at even offsets — WCHAR strings are 2-byte aligned."""
+    if start & 1:
+        return None
+    i = start
+    count = 0
+    while i + 1 < len(data):
+        lo, hi = data[i], data[i + 1]
+        if hi != 0:
+            # Cautious: reject non-Latin-1 to avoid false positives.
+            return None
+        if lo == 0:
+            # WCHAR terminator.
+            break
+        # ASCII control/printable subset OR Latin-1 supplement printables
+        # (0xA0–0xFF, NBSP through ÿ). Covers German umlauts, accented
+        # French chars, Spanish ñ, etc. — common in localised resources.
+        if not (_is_string_byte(lo) or 0xA0 <= lo <= 0xFF):
+            return None
+        count += 1
+        i += 2
+    if i + 1 >= len(data):
+        return None
+    if data[i] != 0 or data[i + 1] != 0:
+        return None
+    if count < WIDE_STRING_MIN_LEN:
+        return None
+    return i + 2
+
+
+def _decode_wide(raw: bytes) -> str:
+    """Decode a WCHAR string body (no terminator) into a printable form
+    using the same escapes as `_escape_string`."""
+    try:
+        s = raw.decode("utf-16-le", errors="replace")
+    except Exception:
+        s = ""
+    out = []
+    for ch in s:
+        b = ord(ch)
+        if b == ord('"'):
+            out.append('\\"')
+        elif b == ord("\\"):
+            out.append("\\\\")
+        elif b == 0x0A:
+            out.append("\\n")
+        elif b == 0x0D:
+            out.append("\\r")
+        elif b == 0x09:
+            out.append("\\t")
+        elif 0x20 <= b <= 0x7E:
+            out.append(chr(b))
+        else:
+            out.append(f"\\u{b:04x}")
+    return "".join(out)
+
+
+class Win32SegData(CommonSegment):
+    """Writable initialised data segment (`.data` in MASM lingo).
+
+    Emits a `.byte` / `.long` / `.quad` representation of the
+    section bytes. Detects:
+    - pointer slots flagged by base-relocations (and synthesises a
+      `func_<va>` / `D_<va>` label for the target);
+    - NUL-terminated printable strings → `.asciz`;
+    - UTF-16LE wide-string runs → preserved as raw bytes with a
+      `/* L"..." */` preview comment;
+    - long zero runs → collapsed into `.space N` directives.
+    `exact_encoding: true` in the YAML disables every heuristic so
+    bytes pass through verbatim."""
+
+    LINKER_SECTION = ".data"
+    SECTION_FLAGS = "wa"
+    # Detect printable NUL-terminated runs and emit them as `.asciz`.
+    # Enabled by default; .data has plenty of CRT strings, source paths,
+    # and format strings worth surfacing. The min-length filter (see
+    # `data._scan_string`) keeps the false-positive rate low.
+    DETECT_STRINGS = True
+    # When the PE has no .reloc table (RELOCS_STRIPPED EXEs) we have no
+    # ground truth for what bytes are pointers. Subclasses that opt in get
+    # a heuristic scan: any 4-byte-aligned word whose value falls inside an
+    # image section is treated as a pointer. Off by default to avoid
+    # rewriting integer data as bogus pointers.
+    HEURISTIC_POINTERS = False
+
+    @property
+    def exact_encoding(self) -> bool:
+        """When enabled, pointer slots emit raw `.long 0xN` / `.quad 0xN`
+        instead of `.long <label>`, and strings are NOT extracted (every
+        byte stays as `.byte`). Result: byte-identical .data after a
+        standalone `as` assembly (no linker required to resolve labels).
+        Inherits from parent code-group YAML if not set per-subsegment."""
+        from ...platforms.win32 import resolve_exact_encoding
+
+        return resolve_exact_encoding(self.yaml, self.parent)
+
+    @staticmethod
+    def is_data() -> bool:
+        return True
+
+    def get_linker_section(self) -> str:
+        return self.LINKER_SECTION
+
+    def get_section_flags(self) -> Optional[str]:
+        return self.SECTION_FLAGS
+
+    def out_path(self) -> Path:
+        return options.opts.data_path / self.dir / f"{self.name}.s"
+
+    def should_split(self) -> bool:
+        return (
+            self.extract
+            and options.opts.is_mode_active("code")
+            and self.rom_start is not None
+            and self.rom_end is not None
+        )
+
+    # Minimum length of an all-zero run before we collapse it into a
+    # single `.space` directive.
+    ZERO_RUN_MIN = 8
+
+    def _emit_byte_chunk(self, data: bytes, start: int, end: int) -> List[str]:
+        """Emit bytes in 16-byte rows, collapsing any run of NULs of length
+        ≥ ZERO_RUN_MIN into a single `.space N` line."""
+        lines: List[str] = []
+        i = start
+        pending_start = start
+        while i < end:
+            # Detect a NUL run starting at `i`.
+            if data[i] == 0:
+                j = i
+                while j < end and data[j] == 0:
+                    j += 1
+                if j - i >= self.ZERO_RUN_MIN:
+                    # Flush any non-zero bytes that came before the run.
+                    if pending_start < i:
+                        for k in range(pending_start, i, 16):
+                            chunk = data[k : min(k + 16, i)]
+                            hexed = ", ".join(f"0x{b:02X}" for b in chunk)
+                            lines.append(f"    .byte {hexed}")
+                    lines.append(f"    .space 0x{j - i:X}")
+                    i = j
+                    pending_start = i
+                    continue
+                # Sub-threshold zero run — skip past the whole thing
+                # rather than re-scanning every byte; saves O(MIN * N) on
+                # data with many short zero clusters.
+                i = j
+                continue
+            i += 1
+        if pending_start < end:
+            for k in range(pending_start, end, 16):
+                chunk = data[k : min(k + 16, end)]
+                hexed = ", ".join(f"0x{b:02X}" for b in chunk)
+                lines.append(f"    .byte {hexed}")
+        return lines
+
+    def _pointer_offsets(self, data: bytes) -> List[int]:
+        """Offsets (within `data`) where a 32-bit pointer lives.
+
+        Prefers the PE Base Relocation Table when present. Falls back to a
+        heuristic scan — opt-in via `HEURISTIC_POINTERS` — that classifies
+        any 4-byte-aligned word as a pointer when its value lands inside an
+        image section."""
+        if self.vram_start is None:
+            return []
+        from ...platforms import win32 as win32_platform
+
+        pe = win32_platform.info
+        data_len = len(data)
+        seg_start_rva = self.vram_start - pe.image_base
+        seg_end_rva = seg_start_rva + data_len
+
+        if pe.pointer_rvas:
+            return sorted(
+                rva - seg_start_rva
+                for rva in pe.pointer_rvas
+                if seg_start_rva <= rva < seg_end_rva
+            )
+
+        if not self.HEURISTIC_POINTERS:
+            return []
+
+        # Heuristic scan. Restrict candidates to values that target a code
+        # section — pointer tables we care about in stripped EXEs (vtables,
+        # jump tables) all point at executable code, and the alternative
+        # (accept any image-resident value) yields too many false positives
+        # from 4-character tags and ID constants.
+        code_ranges = [
+            (
+                pe.image_base + s.virtual_address,
+                pe.image_base + s.virtual_address + max(s.virtual_size, s.raw_size),
+            )
+            for s in pe.sections
+            if s.is_code
+        ]
+        if not code_ranges:
+            return []
+        # Plausible x86 function-prologue first-bytes. Reduces heuristic
+        # false positives (ASCII 4-char tags etc. happen to look like
+        # in-image pointers but never point at a real instruction start).
+        prologue_first_bytes = {
+            0x50,
+            0x51,
+            0x52,
+            0x53,
+            0x54,
+            0x55,
+            0x56,
+            0x57,  # push r32
+            0x6A,
+            0x68,  # push imm
+            0x80,
+            0x81,
+            0x83,  # ALU r/m, imm
+            0x8B,
+            0x89,
+            0x8A,
+            0x88,  # mov r/m, ...
+            0x8C,
+            0x8E,  # mov sreg/r
+            0x8D,  # lea
+            0xB0,
+            0xB1,
+            0xB2,
+            0xB3,
+            0xB4,
+            0xB5,
+            0xB6,
+            0xB7,  # mov r8, imm8
+            0xB8,
+            0xB9,
+            0xBA,
+            0xBB,
+            0xBC,
+            0xBD,
+            0xBE,
+            0xBF,  # mov r32, imm32
+            0xC6,
+            0xC7,  # mov r/m, imm
+            0xE8,
+            0xE9,  # call/jmp rel32
+            0xFF,  # call/jmp [...]
+            0xC2,
+            0xC3,  # ret (leaf)
+            0xCB,
+            0xCA,  # retf
+            0xCC,  # int3
+            0xEB,  # short jmp
+            0x33,
+            0x31,  # xor reg, reg
+            0x0F,  # two-byte op
+            0x66,  # opsize prefix
+            0x64,
+            0x65,  # fs/gs prefix
+            0x67,  # addrsize prefix
+            0xF2,
+            0xF3,  # rep / repne prefix
+            0xF6,
+            0xF7,  # test/not/neg r/m
+            0x40,
+            0x41,
+            0x42,
+            0x43,
+            0x44,
+            0x45,
+            0x46,
+            0x47,  # inc r32 (32-bit) / REX (64-bit)
+            0x48,
+            0x49,
+            0x4A,
+            0x4B,
+            0x4C,
+            0x4D,
+            0x4E,
+            0x4F,  # dec r32 (32-bit) / REX.W+ (64-bit)
+            0xA0,
+            0xA1,
+            0xA2,
+            0xA3,  # mov mov al/eax, [mem]
+            0xD8,
+            0xD9,
+            0xDA,
+            0xDB,
+            0xDC,
+            0xDD,
+            0xDE,
+            0xDF,  # FPU x87 group
+            0xF8,
+            0xF9,
+            0xFA,
+            0xFB,
+            0xFC,
+            0xFD,  # clc/stc/cli/sti/cld/std (rare leaf)
+        }
+        offsets: List[int] = []
+        ptr_size, ptr_fmt, _, _ = win32_platform.ptr_layout(pe.is_pe32_plus)
+        start = (-self.vram_start) & (ptr_size - 1)
+
+        def looks_like_function(target: int) -> bool:
+            rva = target - pe.image_base
+            f_off = pe.rva_to_file_offset(rva)
+            if f_off is None or f_off >= len(win32_platform.raw_image):
+                return False
+            return win32_platform.raw_image[f_off] in prologue_first_bytes
+
+        for i in range(start, data_len - (ptr_size - 1), ptr_size):
+            value = struct.unpack_from(ptr_fmt, data, i)[0]
+            if not any(lo <= value < hi for lo, hi in code_ranges):
+                continue
+            if not looks_like_function(value):
+                continue
+            offsets.append(i)
+        return offsets
+
+    def _resolve_pointer(self, va: int) -> Optional[str]:
+        from ...util import symbols as symbols_mod
+        from ...platforms import win32 as win32_platform
+
+        entries = symbols_mod.all_symbols_dict.get(va)
+        if entries:
+            return entries[0].name
+
+        # No declared symbol — synthesise one based on which section the
+        # pointer lands in. Matches the labels Win32SegText auto-emits at
+        # every direct call target.
+        pe = win32_platform.info
+        rva = va - pe.image_base
+        for section in pe.sections:
+            sec_end = section.virtual_address + max(
+                section.virtual_size, section.raw_size
+            )
+            if section.virtual_address <= rva < sec_end:
+                if section.is_code:
+                    return f"func_{va:08X}"
+                return f"D_{va:08X}"
+        return None
+
+    def _dump_with_strings_and_pointers(
+        self, data: bytes, pointer_offsets: List[int]
+    ) -> List[str]:
+        """Mix `.asciz` strings (if DETECT_STRINGS) and `.long`/`.quad`
+        pointers in with the usual `.byte` block. Pointers always win over
+        byte runs; strings win when no pointer overlaps."""
+        from ...platforms import win32 as win32_platform
+
+        ptr_size, ptr_fmt, ptr_directive, ptr_width = win32_platform.ptr_layout(
+            win32_platform.info.is_pe32_plus
+        )
+
+        ptr_set = set(pointer_offsets)
+        lines: List[str] = []
+        n = len(data)
+        i = 0
+        chunk_start = 0
+
+        def flush_chunk(upto: int) -> None:
+            if chunk_start < upto:
+                lines.extend(self._emit_byte_chunk(data, chunk_start, upto))
+
+        exact = self.exact_encoding
+        while i < n:
+            if i in ptr_set and i + ptr_size <= n:
+                flush_chunk(i)
+                raw = struct.unpack_from(ptr_fmt, data, i)[0]
+                target = None if exact else self._resolve_pointer(raw)
+                if target is not None:
+                    lines.append(
+                        f"    {ptr_directive} {target}  /* 0x{raw:0{ptr_width}X} */"
+                    )
+                else:
+                    lines.append(f"    {ptr_directive} 0x{raw:0{ptr_width}X}")
+                i += ptr_size
+                chunk_start = i
+                continue
+            if self.DETECT_STRINGS and not exact:
+                end = _scan_string(data, i)
+                # Reject the string if it would straddle a pointer slot.
+                if end is not None and not any(i <= p < end for p in ptr_set):
+                    flush_chunk(i)
+                    text = _escape_string(data[i : end - 1])
+                    lines.append(f'    .asciz "{text}"')
+                    i = end
+                    chunk_start = i
+                    continue
+                # UTF-16LE wide string (e.g. Windows API L"..." literals).
+                w_end = _scan_wide_string(data, i)
+                if w_end is not None and not any(i <= p < w_end for p in ptr_set):
+                    flush_chunk(i)
+                    body = data[i : w_end - 2]
+                    text = _decode_wide(body)
+                    # Emit as raw bytes so the layout round-trips byte-for-
+                    # byte even when GAS's `.string16` directive is absent.
+                    lines.append(f'    /* L"{text}" */')
+                    bb = ", ".join(f"0x{b:02X}" for b in data[i:w_end])
+                    lines.append(f"    .byte {bb}")
+                    i = w_end
+                    chunk_start = i
+                    continue
+            i += 1
+        flush_chunk(n)
+        return lines
+
+    def split(self, rom_bytes: bytes):
+        if self.rom_start is None or self.rom_end is None:
+            return
+        if self.rom_start == self.rom_end:
+            return
+
+        path = self.out_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+
+        data = rom_bytes[self.rom_start : self.rom_end]
+        pointer_offsets = self._pointer_offsets(data)
+        if pointer_offsets or self.DETECT_STRINGS:
+            body_lines = self._dump_with_strings_and_pointers(data, pointer_offsets)
+        else:
+            body_lines = self._emit_byte_chunk(data, 0, len(data))
+
+        with path.open("w", encoding="utf-8", newline="\n") as f:
+            preamble = options.opts.generated_s_preamble
+            if preamble:
+                f.write(preamble + "\n\n")
+            f.write(self.get_section_asm_line() + "\n\n")
+            f.write(f".global {self.name}\n")
+            f.write(f"{self.name}:\n")
+            for line in body_lines:
+                f.write(line + "\n")
+
+        self.log(f"Wrote {self.name} to {path}")
diff --git a/src/splat/segtypes/win32/header.py b/src/splat/segtypes/win32/header.py
new file mode 100644
index 00000000..b7e50410
--- /dev/null
+++ b/src/splat/segtypes/win32/header.py
@@ -0,0 +1,713 @@
+"""Win32 PE header segment.
+
+Dumps the relevant fields of the DOS stub, the COFF file header, the
+optional header and the section table as ``.byte`` / ``.long`` / ``.ascii``
+directives, mirroring the layout produced by the linker so the segment
+round-trips byte-for-byte.
+
+The segment's ``rom_end`` is expected to point at (or just past) the end of
+the section table — usually the first non-zero byte of the first real
+section. Anything inside that range is emitted verbatim so the alignment
+padding the linker inserted before the first section is preserved.
+"""
+
+import struct
+from typing import List
+
+from ..common.header import CommonSegHeader
+from ...platforms import win32 as win32_platform
+
+
+_MACHINE_TYPES = {
+    0x014C: "i386",
+    0x0162: "MIPS R3000 LE",
+    0x0166: "MIPS R4000 LE",
+    0x0168: "MIPS R10000",
+    0x0184: "Alpha AXP",
+    0x01C0: "ARM (legacy CE)",
+    0x01C2: "ARM Thumb",
+    0x01C4: "ARMNT (Thumb-2)",
+    0x01F0: "PowerPC LE",
+    0x01F1: "PowerPC w/ FP",
+    0x0200: "Itanium (IA-64)",
+    0x0266: "MIPS16",
+    0x0366: "MIPSFPU",
+    0x0466: "MIPSFPU16",
+    0x0EBC: "EFI byte code",
+    0x5032: "RISC-V 32-bit",
+    0x5064: "RISC-V 64-bit",
+    0x5128: "RISC-V 128-bit",
+    0x6232: "LoongArch 32-bit",
+    0x6264: "LoongArch 64-bit",
+    0x8664: "amd64",
+    0x9041: "Mitsubishi M32R LE",
+    0xAA64: "ARM64",
+    0xC0EE: "CLR/CEE",
+}
+
+
+_SUBSYSTEMS = {
+    0: "UNKNOWN",
+    1: "NATIVE",
+    2: "WINDOWS_GUI",
+    3: "WINDOWS_CUI",
+    5: "OS2_CUI",
+    7: "POSIX_CUI",
+    8: "NATIVE_WINDOWS",
+    9: "WINDOWS_CE_GUI",
+    10: "EFI_APPLICATION",
+    11: "EFI_BOOT_SERVICE_DRIVER",
+    12: "EFI_RUNTIME_DRIVER",
+    13: "EFI_ROM",
+    14: "XBOX",
+    16: "WINDOWS_BOOT_APPLICATION",
+}
+
+_DLL_CHARACTERISTICS = [
+    (0x0020, "HIGH_ENTROPY_VA"),
+    (0x0040, "DYNAMIC_BASE"),
+    (0x0080, "FORCE_INTEGRITY"),
+    (0x0100, "NX_COMPAT"),
+    (0x0200, "NO_ISOLATION"),
+    (0x0400, "NO_SEH"),
+    (0x0800, "NO_BIND"),
+    (0x1000, "APPCONTAINER"),
+    (0x2000, "WDM_DRIVER"),
+    (0x4000, "GUARD_CF"),
+    (0x8000, "TERMINAL_SERVER_AWARE"),
+]
+
+
+_FILE_CHARACTERISTICS = [
+    (0x0001, "RELOCS_STRIPPED"),
+    (0x0002, "EXECUTABLE_IMAGE"),
+    (0x0004, "LINE_NUMS_STRIPPED"),
+    (0x0008, "LOCAL_SYMS_STRIPPED"),
+    (0x0010, "AGGRESSIVE_WS_TRIM"),
+    (0x0020, "LARGE_ADDRESS_AWARE"),
+    (0x0080, "BYTES_REVERSED_LO"),
+    (0x0100, "32BIT_MACHINE"),
+    (0x0200, "DEBUG_STRIPPED"),
+    (0x0400, "REMOVABLE_RUN_FROM_SWAP"),
+    (0x0800, "NET_RUN_FROM_SWAP"),
+    (0x1000, "SYSTEM"),
+    (0x2000, "DLL"),
+    (0x4000, "UP_SYSTEM_ONLY"),
+    (0x8000, "BYTES_REVERSED_HI"),
+]
+
+
+def _decode_flags(value: int, table) -> str:
+    names = [name for bit, name in table if value & bit]
+    # Surface any leftover bits — preserves information when a binary
+    # uses a flag combination we haven't tabulated (e.g. newer
+    # IMAGE_DLLCHARACTERISTICS values).
+    known_mask = 0
+    for bit, _ in table:
+        known_mask |= bit
+    leftover = value & ~known_mask
+    if leftover:
+        names.append(f"unknown 0x{leftover:X}")
+    return " | ".join(names) if names else "(none)"
+
+
+class Win32SegHeader(CommonSegHeader):
+    """The PE header segment.
+
+    Overrides `get_linker_section` to `.header` so the splat-generated
+    linker script pulls bytes from the right input section name —
+    header.s emits `.section .header` (matching the PE convention)
+    rather than the default `.data` that the base class would assume.
+
+    Emits a structured `.s` file covering DOS stub, PE signature,
+    COFF file header, optional header (PE32 or PE32+ specific
+    layout), every populated data directory, and the section table.
+    Each field renders as a width-correct `.short` / `.long` / etc.
+    directive with a trailing comment naming the field — the
+    generated bytes match the original PE header byte-for-byte. A
+    one-page human-readable summary block precedes the byte
+    emission, decoding Machine / ImageBase / EntryPoint /
+    Subsystem / characteristics flags / sections / exports /
+    imports / PDB / TLS / resources by name."""
+
+    def get_linker_section(self) -> str:
+        # Override the default `.data` so the linker script's
+        # `<file>.o(.header)` reference matches the section name that
+        # parse_header() emits via `.section .header`.
+        return ".header"
+
+    @staticmethod
+    def _le(data: bytes) -> bytes:
+        """Reverse little-endian numeric fields so `.long`/`.short` produce
+        the same byte sequence as the source file."""
+        return data[::-1]
+
+    def parse_header(self, rom_bytes) -> List[str]:
+        assert isinstance(self.rom_start, int)
+        assert isinstance(self.rom_end, int)
+        pe = win32_platform.info
+
+        out: List[str] = []
+        out.extend(self._summary_block(pe))
+        out.append(".section .header")
+        out.append("")
+
+        # DOS header magic + the rest of the MS-DOS stub.
+        out.append("/* IMAGE_DOS_HEADER */")
+        out.append(self.get_line("ascii", rom_bytes[0x00:0x02], "Magic 'MZ'"))
+        # Dump bytes 0x02..0x3C as raw bytes — the layout varies between
+        # toolchains and splat just needs to round-trip them.
+        out.append(self._byte_block(rom_bytes[0x02:0x3C], "DOS header tail"))
+        out.append(self.get_line("long", self._le(rom_bytes[0x3C:0x40]), "e_lfanew"))
+        out.append("")
+
+        pe_off = pe.pe_header_offset
+        if pe_off > 0x40:
+            out.append("/* DOS stub */")
+            out.append(self._byte_block(rom_bytes[0x40:pe_off], "DOS stub bytes"))
+            out.append("")
+
+        # PE signature + COFF file header.
+        out.append("/* PE signature + IMAGE_FILE_HEADER */")
+        out.append(self.get_line("ascii", rom_bytes[pe_off : pe_off + 4], "Signature"))
+        out.append(
+            self.get_line(
+                "short", self._le(rom_bytes[pe_off + 4 : pe_off + 6]), "Machine"
+            )
+        )
+        out.append(
+            self.get_line(
+                "short",
+                self._le(rom_bytes[pe_off + 6 : pe_off + 8]),
+                "NumberOfSections",
+            )
+        )
+        out.append(
+            self.get_line(
+                "long",
+                self._le(rom_bytes[pe_off + 8 : pe_off + 12]),
+                "TimeDateStamp",
+            )
+        )
+        out.append(
+            self.get_line(
+                "long",
+                self._le(rom_bytes[pe_off + 12 : pe_off + 16]),
+                "PointerToSymbolTable",
+            )
+        )
+        out.append(
+            self.get_line(
+                "long",
+                self._le(rom_bytes[pe_off + 16 : pe_off + 20]),
+                "NumberOfSymbols",
+            )
+        )
+        size_opt_hdr = struct.unpack_from("<H", rom_bytes, pe_off + 20)[0]
+        out.append(
+            self.get_line(
+                "short",
+                self._le(rom_bytes[pe_off + 20 : pe_off + 22]),
+                "SizeOfOptionalHeader",
+            )
+        )
+        out.append(
+            self.get_line(
+                "short",
+                self._le(rom_bytes[pe_off + 22 : pe_off + 24]),
+                "Characteristics",
+            )
+        )
+        out.append("")
+
+        opt_off = pe_off + 24
+        opt_end = opt_off + size_opt_hdr
+        out.append("/* IMAGE_OPTIONAL_HEADER */")
+        out.extend(self._dump_optional_header(rom_bytes, opt_off, opt_end))
+        out.append("")
+
+        # Section table — 40 bytes each.
+        out.append("/* Section table */")
+        sect_off = opt_end
+        for i, section in enumerate(pe.sections):
+            base = sect_off + i * 40
+            out.append(f"/* Section {i}: {section.name!r} */")
+            out.append(self.get_line("ascii", rom_bytes[base : base + 8], "Name"))
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 8 : base + 12]),
+                    "VirtualSize",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 12 : base + 16]),
+                    "VirtualAddress",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 16 : base + 20]),
+                    "SizeOfRawData",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 20 : base + 24]),
+                    "PointerToRawData",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 24 : base + 28]),
+                    "PointerToRelocations",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 28 : base + 32]),
+                    "PointerToLinenumbers",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "short",
+                    self._le(rom_bytes[base + 32 : base + 34]),
+                    "NumberOfRelocations",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "short",
+                    self._le(rom_bytes[base + 34 : base + 36]),
+                    "NumberOfLinenumbers",
+                )
+            )
+            out.append(
+                self.get_line(
+                    "long",
+                    self._le(rom_bytes[base + 36 : base + 40]),
+                    "Characteristics",
+                )
+            )
+        out.append("")
+
+        # Anything between end-of-section-table and rom_end is alignment padding
+        # the linker inserted before the first real section.
+        tail_start = sect_off + len(pe.sections) * 40
+        if tail_start < self.rom_end:
+            out.append("/* Header padding */")
+            out.append(
+                self._byte_block(rom_bytes[tail_start : self.rom_end], "Padding")
+            )
+
+        out.append("")
+        return out
+
+    @staticmethod
+    def _byte_block(data: bytes, comment: str) -> str:
+        if not data:
+            return f"/* {comment}: empty */"
+        hexed = ", ".join(f"0x{b:02X}" for b in data)
+        return f".byte {hexed} /* {comment} */"
+
+    def _summary_block(self, pe) -> List[str]:
+        out: List[str] = []
+        out.append("/* ============================================================")
+        out.append(" * PE / COFF summary (decoded from the bytes below)")
+        machine_name = _MACHINE_TYPES.get(pe.machine)
+        out.append(
+            f" *   Machine:           0x{pe.machine:04X}"
+            + (f" ({machine_name})" if machine_name else "")
+        )
+        base_width = 16 if pe.is_pe32_plus else 8
+        out.append(f" *   ImageBase:         0x{pe.image_base:0{base_width}X}")
+        if pe.entry_point_rva:
+            out.append(
+                f" *   EntryPoint:        0x{pe.entry_point_va:0{base_width}X}"
+                f"  (RVA 0x{pe.entry_point_rva:X})"
+            )
+        else:
+            out.append(" *   EntryPoint:        (none — DLL with no entry)")
+        sub_name = _SUBSYSTEMS.get(pe.subsystem, f"0x{pe.subsystem:04X}")
+        out.append(f" *   Subsystem:         {sub_name}")
+        if pe.linker_major or pe.linker_minor:
+            from ...platforms.win32 import linker_version_label
+
+            out.append(
+                f" *   Linker:            {pe.linker_major}.{pe.linker_minor:02d}"
+                f"  ({linker_version_label(pe.linker_major, pe.linker_minor)})"
+            )
+        if pe.coff_num_symbols:
+            out.append(
+                f" *   COFF symbols:      {pe.coff_num_symbols} at file 0x{pe.coff_symtab_ptr:X}"
+                "  (deprecated; unusual for PEs)"
+            )
+            if pe.coff_symbols:
+                shown = pe.coff_symbols[:8]
+                out.append(
+                    f" *     Sample symbols ({len(shown)} of {len(pe.coff_symbols)}):"
+                )
+                for sym in shown:
+                    out.append(
+                        f" *       {sym.name!s:<24s} value=0x{sym.value:08X}"
+                        f" section={sym.section_number} class={sym.storage_class}"
+                    )
+        if pe.clr_header is not None:
+            out.append(
+                f" *   .NET CLR header:   v{pe.clr_header.runtime_major}."
+                f"{pe.clr_header.runtime_minor}"
+                f"  flags=0x{pe.clr_header.flags:X}"
+            )
+            out.append(
+                f" *     Metadata:        RVA 0x{pe.clr_header.metadata_rva:X}"
+                f"  size 0x{pe.clr_header.metadata_size:X}"
+            )
+            out.append(
+                f" *     EntryPoint token: 0x{pe.clr_header.entry_point_token_or_rva:08X}"
+            )
+            if pe.clr_header.strong_name_signature_rva:
+                out.append(
+                    f" *     Strong-name sig: RVA "
+                    f"0x{pe.clr_header.strong_name_signature_rva:X}"
+                    f"  size 0x{pe.clr_header.strong_name_signature_size:X}"
+                )
+        if pe.unwind_info:
+            out.append(
+                f" *   Unwind records:    {len(pe.unwind_info)} decoded "
+                f"IMAGE_UNWIND_INFO blobs (PE32+ SEH)"
+            )
+        if pe.timestamp:
+            import datetime as _dt
+
+            try:
+                dt = _dt.datetime.fromtimestamp(pe.timestamp, _dt.timezone.utc)
+                out.append(
+                    f" *   TimeDateStamp:     0x{pe.timestamp:08X}  ({dt.isoformat()})"
+                )
+            except (OSError, OverflowError, ValueError):
+                out.append(f" *   TimeDateStamp:     0x{pe.timestamp:08X}")
+        out.append(
+            f" *   Characteristics:   {_decode_flags(pe.characteristics, _FILE_CHARACTERISTICS)}"
+        )
+        if pe.dll_characteristics:
+            out.append(
+                f" *   DllCharacteristics: {_decode_flags(pe.dll_characteristics, _DLL_CHARACTERISTICS)}"
+            )
+        out.append(f" *   SectionAlignment:  0x{pe.section_alignment:X}")
+        out.append(f" *   FileAlignment:     0x{pe.file_alignment:X}")
+        out.append(f" *   SizeOfImage:       0x{pe.size_of_image:X}")
+        out.append(f" *   SizeOfHeaders:     0x{pe.size_of_headers:X}")
+        if pe.size_of_stack_reserve or pe.size_of_heap_reserve:
+            out.append(
+                f" *   Stack:             0x{pe.size_of_stack_reserve:X} reserve / 0x{pe.size_of_stack_commit:X} commit"
+            )
+            out.append(
+                f" *   Heap:              0x{pe.size_of_heap_reserve:X} reserve / 0x{pe.size_of_heap_commit:X} commit"
+            )
+        if pe.pdb_path:
+            out.append(f" *   PDB path:          {pe.pdb_path}")
+        if pe.pdb_guid:
+            age = pe.pdb_age if pe.pdb_age is not None else 0
+            out.append(f" *   PDB GUID/Age:      {pe.pdb_guid}-{age}")
+        if pe.tls_callback_vas:
+            cb_list = ", ".join(f"0x{v:0{base_width}X}" for v in pe.tls_callback_vas)
+            out.append(f" *   TLS callbacks:     {cb_list}")
+        out.append(" *")
+        out.append(" *   Sections:")
+        for s in pe.sections:
+            flags = []
+            if s.is_code:
+                flags.append("CODE")
+            if s.is_writable:
+                flags.append("RW")
+            else:
+                flags.append("R")
+            if s.is_bss:
+                flags.append("BSS")
+            out.append(
+                f" *     {s.name:<10s} VA=0x{(pe.image_base + s.virtual_address):0{base_width}X}"
+                f"  VSize=0x{s.virtual_size:X}"
+                f"  Raw=0x{s.raw_pointer:X}+0x{s.raw_size:X}"
+                f"  [{','.join(flags)}]"
+            )
+        if pe.exports:
+            out.append(" *")
+            shown = pe.exports[:8]
+            out.append(
+                f" *   Exports ({len(pe.exports)} total; showing first {len(shown)}):"
+            )
+            for exp in shown:
+                va = pe.image_base + exp.rva
+                if exp.forwarder is not None:
+                    out.append(
+                        f" *     [{exp.ordinal:>4}] {exp.name or '<unnamed>'} -> {exp.forwarder}"
+                    )
+                else:
+                    out.append(
+                        f" *     [{exp.ordinal:>4}] 0x{va:0{base_width}X} {exp.name or '<unnamed>'}"
+                    )
+        if pe.imports:
+            by_dll: dict = {}
+            for imp in pe.imports:
+                by_dll.setdefault(imp.dll, []).append(imp)
+            out.append(" *")
+            out.append(
+                f" *   Imports ({len(pe.imports)} total from {len(by_dll)} DLLs):"
+            )
+            for dll, items in by_dll.items():
+                out.append(f" *     {dll}: {len(items)}")
+        if pe.delay_imports:
+            from collections import Counter as _Counter
+
+            by_dll = _Counter(imp.dll for imp in pe.delay_imports)
+            out.append(" *")
+            out.append(
+                f" *   Delay imports ({len(pe.delay_imports)} total from {len(by_dll)} DLLs):"
+            )
+            for dll, n in by_dll.items():
+                out.append(f" *     {dll}: {n}")
+        if 0 < len(pe.data_directories) < 16:
+            out.append(" *")
+            out.append(
+                f" *   NumberOfRvaAndSizes: {len(pe.data_directories)}"
+                "  (less than the standard 16 — older toolchain or hand-crafted PE)"
+            )
+        if pe.bound_imports:
+            out.append(" *")
+            out.append(f" *   Bound imports ({len(pe.bound_imports)}):")
+            for bi in pe.bound_imports:
+                fwds = (
+                    f" + {len(bi.forwarder_refs)} forwarder refs"
+                    if bi.forwarder_refs
+                    else ""
+                )
+                out.append(f" *     {bi.dll}  (ts=0x{bi.timestamp:08X}){fwds}")
+        if pe.security_cookie_va or pe.safe_seh_handlers or pe.cfg_function_rvas:
+            out.append(" *")
+            out.append(" *   Load Config:")
+            if pe.security_cookie_va:
+                out.append(f" *     SecurityCookie:  0x{pe.security_cookie_va:08X}")
+            if pe.safe_seh_handlers:
+                out.append(f" *     SafeSEH handlers: {len(pe.safe_seh_handlers)}")
+            if pe.cfg_function_rvas:
+                out.append(
+                    f" *     CFG functions:    {len(pe.cfg_function_rvas)} (flags=0x{pe.cfg_flags:X})"
+                )
+        if pe.runtime_functions:
+            out.append(" *")
+            out.append(
+                f" *   Exception Directory: {len(pe.runtime_functions)} RUNTIME_FUNCTION entries"
+            )
+        if pe.pointer_rvas:
+            out.append(" *")
+            out.append(
+                f" *   Base relocations: {len(pe.pointer_rvas)} HIGHLOW/DIR64 entries"
+            )
+        if pe.version_info:
+            out.append(" *")
+            out.append(" *   VS_VERSIONINFO:")
+            # Stable presentation order — most useful fields first.
+            preferred = [
+                "FileVersion",
+                "ProductVersion",
+                "ProductName",
+                "FileDescription",
+                "OriginalFilename",
+                "InternalName",
+                "CompanyName",
+                "LegalCopyright",
+            ]
+            for key in preferred:
+                if key in pe.version_info:
+                    out.append(f" *     {key:<18s} {pe.version_info[key]}")
+            for key, val in pe.version_info.items():
+                if key in preferred:
+                    continue
+                out.append(f" *     {key:<18s} {val}")
+        if pe.resources:
+            out.append(" *")
+            from collections import Counter
+            from ...platforms.win32 import RESOURCE_TYPE_NAMES
+
+            kinds: Counter = Counter()
+            for r in pe.resources:
+                if isinstance(r.rtype, int):
+                    kinds[RESOURCE_TYPE_NAMES.get(r.rtype, f"TYPE_{r.rtype}")] += 1
+                else:
+                    kinds[str(r.rtype)] += 1
+            out.append(f" *   Resources ({len(pe.resources)} total):")
+            for kind, n in sorted(kinds.items(), key=lambda kv: -kv[1]):
+                out.append(f" *     {kind}: {n}")
+        out.append(" * ============================================================ */")
+        out.append("")
+        return out
+
+    # Named directory slots in the order the PE optional header stores them.
+    _DATA_DIRECTORY_NAMES = [
+        "Export Table",
+        "Import Table",
+        "Resource Table",
+        "Exception Table",
+        "Certificate Table",
+        "Base Relocation Table",
+        "Debug",
+        "Architecture",
+        "Global Ptr",
+        "TLS Table",
+        "Load Config Table",
+        "Bound Import",
+        "IAT",
+        "Delay Import Descriptor",
+        "CLR Runtime Header",
+        "Reserved",
+    ]
+
+    def _dump_optional_header(
+        self, rom_bytes: bytes, opt_off: int, opt_end: int
+    ) -> List[str]:
+        out: List[str] = []
+        if opt_end - opt_off < 2:
+            out.append(self._byte_block(rom_bytes[opt_off:opt_end], "Optional header"))
+            return out
+
+        magic = struct.unpack_from("<H", rom_bytes, opt_off)[0]
+        is_pe32_plus = magic == win32_platform.OPT_MAGIC_PE32_PLUS
+        size_int = 8 if is_pe32_plus else 4
+        size_typ = "quad" if is_pe32_plus else "long"
+
+        # Truncated optional headers — short-circuit if a field would
+        # overshoot. Some malformed PEs declare a SizeOfOptionalHeader
+        # smaller than the canonical layout; emit whatever fits as raw
+        # bytes (once) instead of producing zero-width `.short ""`
+        # directives that GAS would reject.
+        truncation_emitted = [False]
+
+        def _ensure_room(field_size: int, off: int, label: str) -> bool:
+            if off + field_size <= opt_end and off + field_size <= len(rom_bytes):
+                return True
+            if not truncation_emitted[0]:
+                tail = rom_bytes[off : min(opt_end, len(rom_bytes))]
+                if tail:
+                    out.append(self._byte_block(tail, f"truncated past {label}"))
+                truncation_emitted[0] = True
+            return False
+
+        def emit_short(off: int, label: str) -> None:
+            if not _ensure_room(2, off, label):
+                return
+            out.append(
+                self.get_line("short", self._le(rom_bytes[off : off + 2]), label)
+            )
+
+        def emit_long(off: int, label: str) -> None:
+            if not _ensure_room(4, off, label):
+                return
+            out.append(self.get_line("long", self._le(rom_bytes[off : off + 4]), label))
+
+        def emit_int(off: int, label: str) -> None:
+            if not _ensure_room(size_int, off, label):
+                return
+            out.append(
+                self.get_line(
+                    size_typ, self._le(rom_bytes[off : off + size_int]), label
+                )
+            )
+
+        o = opt_off
+        emit_short(o, "Magic")
+        o += 2
+        out.append(self.get_line("byte", rom_bytes[o : o + 1], "MajorLinkerVersion"))
+        o += 1
+        out.append(self.get_line("byte", rom_bytes[o : o + 1], "MinorLinkerVersion"))
+        o += 1
+        emit_long(o, "SizeOfCode")
+        o += 4
+        emit_long(o, "SizeOfInitializedData")
+        o += 4
+        emit_long(o, "SizeOfUninitializedData")
+        o += 4
+        emit_long(o, "AddressOfEntryPoint")
+        o += 4
+        emit_long(o, "BaseOfCode")
+        o += 4
+        if not is_pe32_plus:
+            emit_long(o, "BaseOfData")
+            o += 4
+
+        emit_int(o, "ImageBase")
+        o += size_int
+        emit_long(o, "SectionAlignment")
+        o += 4
+        emit_long(o, "FileAlignment")
+        o += 4
+        emit_short(o, "MajorOperatingSystemVersion")
+        o += 2
+        emit_short(o, "MinorOperatingSystemVersion")
+        o += 2
+        emit_short(o, "MajorImageVersion")
+        o += 2
+        emit_short(o, "MinorImageVersion")
+        o += 2
+        emit_short(o, "MajorSubsystemVersion")
+        o += 2
+        emit_short(o, "MinorSubsystemVersion")
+        o += 2
+        emit_long(o, "Win32VersionValue")
+        o += 4
+        emit_long(o, "SizeOfImage")
+        o += 4
+        emit_long(o, "SizeOfHeaders")
+        o += 4
+        emit_long(o, "CheckSum")
+        o += 4
+        emit_short(o, "Subsystem")
+        o += 2
+        emit_short(o, "DllCharacteristics")
+        o += 2
+        emit_int(o, "SizeOfStackReserve")
+        o += size_int
+        emit_int(o, "SizeOfStackCommit")
+        o += size_int
+        emit_int(o, "SizeOfHeapReserve")
+        o += size_int
+        emit_int(o, "SizeOfHeapCommit")
+        o += size_int
+        emit_long(o, "LoaderFlags")
+        o += 4
+        num_dirs_off = o
+        num_dirs = struct.unpack_from("<I", rom_bytes, num_dirs_off)[0]
+        emit_long(o, "NumberOfRvaAndSizes")
+        o += 4
+
+        # Data directories — each is {VirtualAddress, Size}. PE spec caps
+        # the canonical count at 16; honour but tolerate a slightly
+        # higher declared count (loaders accept up to whatever fits in
+        # the optional header). Cap iteration at 256 to bound malformed
+        # inputs that claim millions of directory entries.
+        if num_dirs > 0:
+            out.append("/* Data directories */")
+        for i in range(min(num_dirs, 256)):
+            if o + 8 > opt_end:
+                break
+            label = (
+                self._DATA_DIRECTORY_NAMES[i]
+                if i < len(self._DATA_DIRECTORY_NAMES)
+                else f"Directory {i}"
+            )
+            emit_long(o, f"{label}.VirtualAddress")
+            emit_long(o + 4, f"{label}.Size")
+            o += 8
+
+        # Anything left inside the declared optional header (trailing reserved
+        # bytes) gets dumped raw so we round-trip cleanly.
+        if o < opt_end:
+            out.append(self._byte_block(rom_bytes[o:opt_end], "Optional header tail"))
+
+        return out
diff --git a/src/splat/segtypes/win32/pdata.py b/src/splat/segtypes/win32/pdata.py
new file mode 100644
index 00000000..91414dfe
--- /dev/null
+++ b/src/splat/segtypes/win32/pdata.py
@@ -0,0 +1,196 @@
+"""Win32 .pdata segment — PE32+ Exception Directory.
+
+The `.pdata` section is an array of `RUNTIME_FUNCTION` records, each
+12 bytes: `(BeginAddress, EndAddress, UnwindInfoAddress)` as RVAs. Splat
+emits one row per entry with function-target labels resolved so the
+table reads as a function map rather than an opaque byte blob.
+"""
+
+from pathlib import Path
+import struct
+from typing import Optional
+
+from ..common.segment import CommonSegment
+from ...util import options
+
+
+class Win32SegPdata(CommonSegment):
+    """PE32+ exception-directory segment.
+
+    Renders the `.pdata` section's RUNTIME_FUNCTION records as a
+    `.long Begin, End, Unwind` row each — with `Begin`/`End` resolved
+    to symbolic `func_<va>` labels and the unwind RVA's high bit
+    (chained-record flag) preserved. With `exact_encoding: true` the
+    same rows emit raw hex RVAs instead, so the bytes survive a
+    standalone `as` reassembly without needing cross-segment symbol
+    resolution. Each row's trailing comment carries the decoded
+    UNWIND_INFO opcode list when one was found."""
+
+    @staticmethod
+    def is_rodata() -> bool:
+        return True
+
+    @property
+    def exact_encoding(self) -> bool:
+        """When on, emit raw hex RVAs (`.long 0x<begin>, 0x<end>,
+        0x<unwind>`) rather than symbolic `func_<va> - ImageBase`
+        expressions — necessary for the win32_reassemble byte-
+        identical round-trip since cross-segment symbols would
+        otherwise resolve to 0 without ld. Inherits from parent
+        code-group YAML."""
+        from ...platforms.win32 import resolve_exact_encoding
+
+        return resolve_exact_encoding(self.yaml, self.parent)
+
+    def get_linker_section(self) -> str:
+        return ".pdata"
+
+    def get_section_flags(self) -> Optional[str]:
+        return "a"
+
+    def out_path(self) -> Path:
+        return options.opts.data_path / self.dir / f"{self.name}.s"
+
+    def should_split(self) -> bool:
+        return (
+            self.extract
+            and options.opts.is_mode_active("code")
+            and self.rom_start is not None
+            and self.rom_end is not None
+        )
+
+    def split(self, rom_bytes: bytes):
+        if self.rom_start is None or self.rom_end is None:
+            return
+        if self.rom_start == self.rom_end:
+            return
+
+        from ...platforms import win32 as win32_platform
+        from ...util import symbols as symbols_mod
+
+        pe = win32_platform.info
+        data = rom_bytes[self.rom_start : self.rom_end]
+        exact = self.exact_encoding
+
+        def resolve_func_rva(rva: int) -> str:
+            """Map a RUNTIME_FUNCTION RVA (a function start, end, or
+            interior cold-block address) to the matching splat label.
+            Prefers a user-declared symbol_addrs entry; falls back to
+            the `func_<va>` convention text segments emit at every
+            direct call target so cross-segment links resolve."""
+            va = pe.image_base + rva
+            entries = symbols_mod.all_symbols_dict.get(va)
+            if entries:
+                return entries[0].name
+            # Pdata entries reference function bodies; cross-segment refs
+            # use the same `func_<va>` convention as text segments.
+            return f"func_{va:08X}"
+
+        path = self.out_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", encoding="utf-8", newline="\n") as f:
+            preamble = options.opts.generated_s_preamble
+            if preamble:
+                f.write(preamble + "\n\n")
+            f.write(self.get_section_asm_line() + "\n\n")
+            f.write(f".global {self.name}\n")
+            f.write(f"{self.name}:\n")
+
+            i = 0
+            n = len(data)
+            entry_index = 0
+            while i + 12 <= n:
+                begin, end, unwind = struct.unpack_from("<III", data, i)
+                if begin == 0 and end == 0 and unwind == 0:
+                    # Null terminator — emit it and switch to a .space
+                    # block for any remaining padding so the output isn't
+                    # dominated by hundreds of zero rows.
+                    f.write("    .long 0, 0, 0  /* RUNTIME_FUNCTION terminator */\n")
+                    i += 12
+                    entry_index += 1
+                    pad = n - i
+                    if pad > 0:
+                        # In exact_encoding mode the trailing bytes after
+                        # the terminator must be preserved byte-for-byte —
+                        # PEs often pad section-tail with 0xCC (int3) or
+                        # 0x90 (nop), not zero. `.space` zeros the region;
+                        # emit the raw bytes instead.
+                        if exact:
+                            tail = data[i:n]
+                            hexed = ", ".join(f"0x{b:02X}" for b in tail)
+                            f.write(
+                                f"    .byte {hexed}  /* {pad} bytes pad-to-section-end */\n"
+                            )
+                        else:
+                            f.write(
+                                f"    .space 0x{pad:X}  /* zero padding to section end */\n"
+                            )
+                    i = n
+                    break
+                # Unwind RVA emitted as `unwind_<va> - ImageBase` when a
+                # matching symbol is registered in symbol_addrs.txt — let
+                # the analyst rename it; falls back to raw hex when no
+                # such symbol exists (degenerate inputs or chained
+                # records masked off in create_config).
+                base_uw = unwind & 0x7FFFFFFF
+                unwind_label = None
+                if base_uw:
+                    candidate = symbols_mod.all_symbols_dict.get(
+                        pe.image_base + base_uw
+                    )
+                    if candidate:
+                        unwind_label = candidate[0].name
+                if unwind_label is not None:
+                    # Preserve the chained-record bit by ORing it in
+                    # after the symbolic subtraction.
+                    uw_expr = f"({unwind_label} - 0x{pe.image_base:X})" + (
+                        " | 0x80000000" if unwind & 0x80000000 else ""
+                    )
+                else:
+                    uw_expr = f"0x{unwind:X}"
+                # Optional prologue annotation: every UNWIND_INFO blob
+                # carries an opcode list (push reg, alloc N, set FP, ...)
+                # — surface it in the row's trailing comment when
+                # parse_unwind_info decoded one.
+                unwind_decoded = pe.unwind_info.get(base_uw)
+                unwind_comment = ""
+                if unwind_decoded is not None and unwind_decoded.codes:
+                    prolog_ops = ", ".join(
+                        f"{op}({info})" for _ofs, op, info in unwind_decoded.codes
+                    )
+                    extra = ""
+                    if unwind_decoded.frame_register:
+                        extra = (
+                            f" fp=r{unwind_decoded.frame_register}"
+                            f"+0x{unwind_decoded.frame_register_offset:X}"
+                        )
+                    unwind_comment = (
+                        f" prolog=0x{unwind_decoded.prolog_size:X}{extra}"
+                        f" [{prolog_ops}]"
+                    )
+                if exact:
+                    # Byte-identical mode: emit raw RVAs so the
+                    # assembled output matches the original bytes
+                    # without needing cross-segment symbol resolution.
+                    f.write(
+                        f"    .long 0x{begin:X}, 0x{end:X}, 0x{unwind:X}"
+                        f"  /* [{entry_index}] RUNTIME_FUNCTION{unwind_comment} */\n"
+                    )
+                else:
+                    f.write(
+                        f"    .long {resolve_func_rva(begin)} - 0x{pe.image_base:X}"
+                        f", {resolve_func_rva(end)} - 0x{pe.image_base:X}"
+                        f", {uw_expr}"
+                        f"  /* [{entry_index}] RUNTIME_FUNCTION{unwind_comment} */\n"
+                    )
+                i += 12
+                entry_index += 1
+
+            # Trailing bytes that don't form a complete 12-byte record
+            # (shouldn't happen with valid .pdata, but be defensive).
+            if i < n:
+                tail = data[i:]
+                hexed = ", ".join(f"0x{b:02X}" for b in tail)
+                f.write(f"    .byte {hexed}  /* trailing bytes */\n")
+
+        self.log(f"Wrote {self.name} ({entry_index} runtime functions) to {path}")
diff --git a/src/splat/segtypes/win32/rodata.py b/src/splat/segtypes/win32/rodata.py
new file mode 100644
index 00000000..7b700c62
--- /dev/null
+++ b/src/splat/segtypes/win32/rodata.py
@@ -0,0 +1,27 @@
+"""Win32 .rodata segment — read-only initialized data."""
+
+from .data import Win32SegData
+
+
+class Win32SegRodata(Win32SegData):
+    """Read-only initialised data segment (`.rodata`).
+
+    Inherits everything from Win32SegData but defaults `DETECT_STRINGS`
+    + `HEURISTIC_POINTERS` to True since constants in read-only memory
+    are overwhelmingly NUL-terminated strings or function-pointer
+    tables. Maps to the assembly `.rodata` section with `"a"` flags
+    (allocated, no write) so the assembler places them correctly even
+    when the linker script doesn't merge `.rdata` and `.rodata`."""
+
+    LINKER_SECTION = ".rodata"
+    SECTION_FLAGS = "a"
+    DETECT_STRINGS = True
+    HEURISTIC_POINTERS = True
+
+    @staticmethod
+    def is_data() -> bool:
+        return False
+
+    @staticmethod
+    def is_rodata() -> bool:
+        return True
diff --git a/src/splat/segtypes/win32/text.py b/src/splat/segtypes/win32/text.py
new file mode 100644
index 00000000..19469589
--- /dev/null
+++ b/src/splat/segtypes/win32/text.py
@@ -0,0 +1,841 @@
+"""Win32 .text segment — x86 disassembly via capstone."""
+
+import re
+import struct
+from pathlib import Path
+from typing import Dict, Optional, Set
+
+from ..common.segment import CommonSegment
+from ...util import log, options
+from ...util.symbols import Symbol
+
+
+_HEX_RE = re.compile(r"\b0x([0-9a-fA-F]+)\b")
+
+# Capstone, even in intel-syntax mode, emits some mnemonics with AT&T-style
+# size suffixes that GAS doesn't recognize. Map them back to the canonical
+# intel-syntax names. Trailing whitespace is significant (capstone often
+# trails a space in the mnemonic field).
+_MNEMONIC_REWRITES = {
+    "popal": "popad",
+    "pushal": "pushad",
+    "popfd": "popfd",
+    "pushfd": "pushfd",
+}
+
+# Mnemonics whose memory operand carries no GAS-usable size qualifier
+# (state-save instructions span 94/108/512+ bytes — capstone tends to
+# annotate the operand as `dword ptr` which GAS then rejects).
+_SIZELESS_MEMORY_MNEMONICS = {
+    "fnsave",
+    "fsave",
+    "fnstenv",
+    "fstenv",
+    "frstor",
+    "fxsave",
+    "fxrstor",
+    "fxsave64",
+    "fxrstor64",
+}
+
+# Capstone uses some older size-qualifier names; GAS prefers different
+# spellings. Apply as a literal substring rewrite on the operand string.
+_OPERAND_REWRITES = [
+    ("xword ptr ", "tbyte ptr "),
+    ("xmmword ptr ", "xmmword ptr "),  # no-op; placeholder for future
+]
+
+# Capstone uses `riz`/`eiz` to denote an absent index register in SIB
+# expressions. GAS doesn't recognise either — strip them in every
+# position they can appear: `+ riz*N` (middle/tail), `riz*N + ` (head).
+_NO_INDEX_TAIL_RE = re.compile(r"\s*\+\s*[re]iz\*\d+")
+_NO_INDEX_HEAD_RE = re.compile(r"\[[re]iz\*\d+\s*\+\s*")
+_NO_INDEX_LONE_RE = re.compile(r"\[\s*[re]iz\*\d+\s*\]")
+
+# SSE scalar instructions whose source memory operand is 8 bytes but
+# capstone often labels as `xmmword ptr`. GAS wants `qword ptr` (or no
+# qualifier).
+_SCALAR_SSE_DOUBLE = {
+    "comisd",
+    "ucomisd",
+    "addsd",
+    "subsd",
+    "mulsd",
+    "divsd",
+    "minsd",
+    "maxsd",
+    "sqrtsd",
+    "cvtsi2sd",
+    "cvtsd2si",
+    "cvttsd2si",
+    "cvtsd2ss",
+    "movsd",
+}
+# SSE scalar single — 4-byte source.
+_SCALAR_SSE_SINGLE = {
+    "comiss",
+    "ucomiss",
+    "addss",
+    "subss",
+    "mulss",
+    "divss",
+    "minss",
+    "maxss",
+    "sqrtss",
+    "cvtsi2ss",
+    "cvtss2si",
+    "cvttss2si",
+    "cvtss2sd",
+    "movss",
+}
+# Matches `[rip + 0xN]` / `[rip - 0xN]` / `[rip]` (the last when disp == 0
+# — capstone elides the literal). Needed for PE32+ RIP-relative memory
+# operands, where the absolute target = insn.address + insn.size + disp.
+_RIP_REL_RE = re.compile(r"\[rip(?:\s*(?P<sign>[+-])\s*0x(?P<hex>[0-9a-fA-F]+))?\]")
+
+
+class Win32SegText(CommonSegment):
+    """Executable code segment.
+
+    Two-pass disassembly through Capstone (x86 or x86_64 picked from
+    `pe.is_pe32_plus`): the first pass walks every direct call/jmp
+    target inside the segment to seed function / branch labels; the
+    second emits instructions with operand strings rewritten so
+    addresses, IAT slots, exports, and RIP-relative loads resolve
+    to readable labels. GAS-incompatible Capstone outputs are
+    rewritten to keep the `.s` output assemblable. With
+    `exact_encoding: true`, instruction bytes are emitted verbatim
+    (decoded mnemonic as a comment) so the output round-trips
+    byte-identically through GAS+objcopy."""
+
+    # Default class-level kill switch. Per-segment YAML can override via
+    # `exact_encoding: true` to force byte-identical reassembly: every
+    # instruction is emitted as a `.byte` directive carrying the original
+    # bytes, with the decoded mnemonic moved to a trailing comment.
+    # Disasm readability suffers (no label substitution in operands), but
+    # round-trip through GAS produces byte-identical .text content.
+    EXACT_ENCODING_DEFAULT = False
+
+    @staticmethod
+    def is_text() -> bool:
+        return True
+
+    def get_linker_section(self) -> str:
+        return ".text"
+
+    def get_section_flags(self) -> Optional[str]:
+        return "ax"
+
+    @property
+    def exact_encoding(self) -> bool:
+        from ...platforms.win32 import resolve_exact_encoding
+
+        return resolve_exact_encoding(
+            self.yaml, self.parent, self.EXACT_ENCODING_DEFAULT
+        )
+
+    def out_path(self) -> Path:
+        return options.opts.asm_path / self.dir / f"{self.name}.s"
+
+    def should_scan(self) -> bool:
+        return (
+            options.opts.is_mode_active("code")
+            and self.rom_start is not None
+            and self.rom_end is not None
+        )
+
+    def should_split(self) -> bool:
+        return self.extract and self.should_scan()
+
+    def split(self, rom_bytes: bytes):
+        if self.rom_start is None or self.rom_end is None:
+            return
+        if self.rom_start == self.rom_end:
+            return
+        if not isinstance(self.vram_start, int):
+            log.error(
+                f"win32 text segment '{self.name}' requires a vram address; got {self.vram_start!r}"
+            )
+
+        from ...disassembler.capstone_disassembler import get_capstone_disassembler
+
+        cs_disasm = get_capstone_disassembler()
+        if cs_disasm is None:
+            log.error(
+                "win32 text segment requested but capstone disassembler is not active"
+            )
+        engine = cs_disasm.get_engine()
+
+        data = rom_bytes[self.rom_start : self.rom_end]
+        vram = self.vram_start
+
+        out_path = self.out_path()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # If this segment maps cleanly onto a single PE section, trim the
+        # decode range to the section's VirtualSize so the file-alignment
+        # NUL padding at the tail doesn't get disassembled as thousands of
+        # spurious `add [eax], al` lines. The trimmed bytes get emitted as
+        # a single `.space` directive so the output round-trips.
+        from ...platforms import win32 as _pe_mod
+
+        _pe = _pe_mod.info
+        _seg_rva = vram - _pe.image_base
+        section = next(
+            (
+                s
+                for s in _pe.sections
+                if s.virtual_address == _seg_rva and s.raw_size > 0
+            ),
+            None,
+        )
+        decode_len = len(data)
+        trailing_pad = 0
+        if (
+            section is not None
+            and section.virtual_size < section.raw_size
+            and section.virtual_size <= len(data)
+        ):
+            decode_len = section.virtual_size
+            trailing_pad = len(data) - decode_len
+
+        # Build a quick lookup of any user-declared symbols that fall inside
+        # this segment so they show up as labels in the disassembly.
+        seg_symbols: Dict[int, Optional[Symbol]] = {}
+        for vram_addr, syms in self.seg_symbols.items():
+            if self.vram_start <= vram_addr < (self.vram_start + len(data)):
+                seg_symbols[vram_addr] = syms[0]
+        # Also synthesize a label for the platform-level entry point if it
+        # lands inside this segment.
+        from ...platforms import win32 as win32_platform
+
+        # `entry_point_rva == 0` for DLLs/EXEs with no entry point (e.g.,
+        # resource-only DLLs). Treat 0 as "no entry" so we don't accidentally
+        # label address 0 as `entrypoint`.
+        entry_va = (
+            win32_platform.info.entry_point_va
+            if win32_platform.info.entry_point_rva
+            else -1
+        )
+        if self.vram_start <= entry_va < (self.vram_start + len(data)):
+            seg_symbols.setdefault(entry_va, None)
+
+        # First pass: find every `call <imm>` / `jmp <imm>` / `j<cc> <imm>`
+        # target that lands inside this segment so we can emit a label there
+        # in the second pass. Use capstone's group info (detail=True is set
+        # by CapstoneDisassembler.configure).
+        import capstone
+
+        # Disasm input restricted to the section's real content so we don't
+        # waste cycles labelling addresses inside the alignment padding.
+        decode_data = data[:decode_len]
+        seg_end = vram + decode_len
+        call_targets: Set[int] = set()
+        jump_targets: Set[int] = set()
+
+        # Seed call_targets with every pointer-relocation target that falls
+        # inside this text segment. Data-driven references (vtables, indirect
+        # call tables) are otherwise invisible to the call/jmp scan.
+        # Pointer size matches the PE bitness — DIR64 relocs for PE32+ refer
+        # to 8-byte slots; HIGHLOW for PE32 refers to 4-byte slots.
+        _ptr_seed_size = 8 if win32_platform.info.is_pe32_plus else 4
+        for rva in win32_platform.info.pointer_rvas:
+            f_off = win32_platform.info.rva_to_file_offset(rva)
+            if f_off is None or f_off + _ptr_seed_size > len(rom_bytes):
+                continue
+            tgt_val = int.from_bytes(
+                rom_bytes[f_off : f_off + _ptr_seed_size], "little"
+            )
+            if vram <= tgt_val < seg_end:
+                call_targets.add(tgt_val)
+        # TLS callbacks land somewhere in .text but are not reached by any
+        # direct call/jmp — seed them explicitly so their entries get labels.
+        for cb in win32_platform.info.tls_callback_vas:
+            if vram <= cb < seg_end:
+                call_targets.add(cb)
+        # Exports — for DLLs, an exported function may have no in-binary
+        # callers (only external GetProcAddress users). Seed each export RVA
+        # so the entry gets a func_<va> label; a `symbol_addrs.txt` entry,
+        # if present, replaces that with the export's real name.
+        for exp in win32_platform.info.exports:
+            if exp.forwarder is not None:
+                continue
+            exp_va = win32_platform.info.image_base + exp.rva
+            if vram <= exp_va < seg_end:
+                call_targets.add(exp_va)
+        # PE32+ Exception Directory entries give definitive function
+        # boundaries — seed BeginAddress so SEH-only/cold-block entry
+        # points get labels.
+        for begin_rva, _end_rva, _uw_rva in win32_platform.info.runtime_functions:
+            rf_va = win32_platform.info.image_base + begin_rva
+            if vram <= rf_va < seg_end:
+                call_targets.add(rf_va)
+        # /SAFESEH handler functions are reachable only via the
+        # IMAGE_LOAD_CONFIG_DIRECTORY's SEHandlerTable — not via any direct
+        # call/jmp — so seed them explicitly.
+        for handler_rva in win32_platform.info.safe_seh_handlers:
+            handler_va = win32_platform.info.image_base + handler_rva
+            if vram <= handler_va < seg_end:
+                call_targets.add(handler_va)
+        # /guard:cf — every entry is a known indirect-call target.
+        for cfg_rva in win32_platform.info.cfg_function_rvas:
+            cfg_va = win32_platform.info.image_base + cfg_rva
+            if vram <= cfg_va < seg_end:
+                call_targets.add(cfg_va)
+        offset = 0
+        data_len = decode_len
+        while offset < data_len:
+            advanced = False
+            for insn in engine.disasm(decode_data[offset:], vram + offset):
+                advanced = True
+                offset = (insn.address + insn.size) - vram
+                if not insn.operands:
+                    continue
+                op0 = insn.operands[0]
+                if op0.type != capstone.CS_OP_IMM:
+                    continue
+                tgt = op0.imm
+                if not (vram <= tgt < seg_end):
+                    continue
+                if insn.group(capstone.CS_GRP_CALL):
+                    call_targets.add(tgt)
+                elif insn.group(capstone.CS_GRP_JUMP):
+                    jump_targets.add(tgt)
+            if not advanced:
+                offset += 1
+
+        # Build a global-symbol lookup so we can annotate memory operands
+        # that reach known data (IAT slots, exports, etc.) with a trailing
+        # comment.
+        from ...util import symbols as symbols_mod
+        from .data import _is_string_byte, _escape_string
+
+        pe = win32_platform.info
+        _addr_mask = 0xFFFFFFFFFFFFFFFF if pe.is_pe32_plus else 0xFFFFFFFF
+
+        # Identify single-instruction `jmp dword ptr [<iat>]` thunks at every
+        # call_target so they can be renamed `<imp_name>_thunk` (or
+        # `<imp_name>_thunk_<va>` if the same import has multiple thunks).
+        # Map every IAT slot's VA (eager + delayed) to its canonical label.
+        iat_to_label: Dict[int, str] = dict(win32_platform.compute_iat_labels(pe))
+
+        # Pre-populate `synth_labels` with export and import names so DLLs
+        # surface readable references (`call dword ptr [imp_..._CreateThread]`,
+        # `Init:` label, etc.) without requiring a hand-authored
+        # `symbol_addrs.txt`. The same map is consulted by both `label_for`
+        # (label emission) and `resolve_sym` (op_str substitution).
+        synth_labels: Dict[int, str] = {}
+        export_labels = win32_platform.compute_export_labels(
+            pe, reserved={"entrypoint"} if pe.entry_point_rva else set()
+        )
+        for va, safe in export_labels.items():
+            if vram <= va < seg_end:
+                synth_labels[va] = safe
+        # Imports live in .rdata, not .text — but they're referenced from
+        # within .text via absolute memory operands, so `resolve_sym` needs
+        # to see them too. Reuse the same {slot_va: label} map computed
+        # above for IAT-thunk renaming so labels stay in sync.
+        synth_labels.update(iat_to_label)
+
+        thunk_labels: Dict[int, str] = {}
+        if iat_to_label:
+            used: Dict[str, int] = {}
+            rip_reg = capstone.x86.X86_REG_RIP if pe.is_pe32_plus else 0
+            for target in sorted(call_targets):
+                off = target - vram
+                if off < 0 or off >= decode_len:
+                    continue
+                # Single-pass decode of one instruction from the call target.
+                for insn in engine.disasm(decode_data[off:], target):
+                    if insn.mnemonic != "jmp" or not insn.operands:
+                        break
+                    op0 = insn.operands[0]
+                    if op0.type != capstone.CS_OP_MEM:
+                        break
+                    if op0.mem.index != 0 or op0.mem.segment != 0:
+                        break
+                    if pe.is_pe32_plus:
+                        # PE32+ thunk: `jmp qword ptr [rip + disp]`.
+                        # IAT slot VA = next_ip + disp.
+                        if op0.mem.base != rip_reg:
+                            break
+                        slot_va = (insn.address + insn.size + op0.mem.disp) & _addr_mask
+                    else:
+                        # PE32 thunk: `jmp dword ptr [imm32]`. No base.
+                        if op0.mem.base != 0:
+                            break
+                        slot_va = op0.mem.disp & _addr_mask
+                    iat_label = iat_to_label.get(slot_va)
+                    if iat_label is None:
+                        break
+                    base = f"{iat_label}_thunk"
+                    count = used.get(base, 0)
+                    used[base] = count + 1
+                    final = base if count == 0 else f"{base}_{target:08X}"
+                    thunk_labels[target] = final
+                    break
+
+        def section_for(va: int):
+            rva = va - pe.image_base
+            for s in pe.sections:
+                sec_end = s.virtual_address + max(s.virtual_size, s.raw_size)
+                if s.virtual_address <= rva < sec_end:
+                    return s
+            return None
+
+        def peek_string(va: int) -> Optional[str]:
+            s = section_for(va)
+            if s is None or s.raw_size == 0:
+                return None
+            offset_in_section = (va - pe.image_base) - s.virtual_address
+            # Reject VAs in the section's virtual-only tail — those have no
+            # backing bytes; reading would step into the next section.
+            if offset_in_section >= s.raw_size:
+                return None
+            file_off = s.raw_pointer + offset_in_section
+            if file_off >= len(rom_bytes):
+                return None
+            limit = min(64, len(rom_bytes) - file_off)
+            chunk = rom_bytes[file_off : file_off + limit]
+            end = 0
+            while end < len(chunk) and _is_string_byte(chunk[end]):
+                end += 1
+            if end < 4 or end >= len(chunk) or chunk[end] != 0:
+                return None
+            text = _escape_string(chunk[:end])
+            # Escape `*/` to avoid prematurely terminating the GAS C-style
+            # comment we're going to wrap this preview in.
+            text = text.replace("*/", "*\\/")
+            return f'"{text}"'
+
+        def resolve_sym(addr: int) -> Optional[str]:
+            if addr in thunk_labels:
+                return thunk_labels[addr]
+            entries = symbols_mod.all_symbols_dict.get(addr)
+            if entries:
+                return entries[0].name
+            if addr in synth_labels:
+                return synth_labels[addr]
+            if addr == entry_va:
+                return "entrypoint"
+            if addr in call_targets:
+                return f"func_{addr:08X}"
+            if addr in jump_targets:
+                return f"loc_{addr:08X}"
+            s = section_for(addr)
+            if s is None:
+                return None
+            if s.is_code:
+                # Cross-segment code references resolve to `func_<va>` —
+                # matches the naming convention the target segment will
+                # emit when it builds its own call_targets set, so the
+                # label resolves at link time across .o files.
+                return f"func_{addr:08X}"
+            return f"D_{addr:08X}"
+
+        def substitute_op_str(insn) -> str:
+            """Replace hex literals in `insn.op_str` with their resolved
+            labels, in-place. Leaves the original string alone when no
+            literal resolves to a known address — so register-relative
+            offsets like `[esp + 0x58]` survive untouched.
+
+            For non-branch instructions (mov/push/lea/etc.) where the hex
+            literal is an immediate operand, the substitution uses the
+            `offset <label>` form so GAS treats the operand as the label's
+            address rather than as a memory load."""
+
+            op_str = insn.op_str
+            # PE32+ RIP-relative: compute absolute target and substitute
+            # the entire `[rip + 0xN]` token if it resolves.
+            next_ip = insn.address + insn.size
+
+            def rip_repl(match: "re.Match") -> str:
+                # `[rip]` with no displacement → disp = 0.
+                hex_grp = match.group("hex")
+                disp = int(hex_grp, 16) if hex_grp else 0
+                if match.group("sign") == "-":
+                    disp = -disp
+                target = (next_ip + disp) & 0xFFFFFFFFFFFFFFFF
+                name = resolve_sym(target)
+                if name is None:
+                    return match.group(0)
+                return f"[{name}]"
+
+            op_str = _RIP_REL_RE.sub(rip_repl, op_str)
+
+            # Avoid substituting hex literals that fall outside the loaded
+            # image (stack offsets, small immediate constants, etc.). Any
+            # real symbol address sits at ≥ ImageBase by construction.
+            image_min = pe.image_base
+            image_max = pe.image_base + pe.size_of_image
+
+            is_branch = bool(
+                insn.group(capstone.CS_GRP_CALL) or insn.group(capstone.CS_GRP_JUMP)
+            )
+
+            # `mov reg, imm` / `mov [mem], imm` / `push imm` need `offset`
+            # in GAS intel-syntax so the assembler treats the label as an
+            # immediate address rather than as a memory load.
+            def repl(match: "re.Match") -> str:
+                value = int(match.group(1), 16)
+                if not (image_min <= value < image_max):
+                    return match.group(0)
+                name = resolve_sym(value)
+                if name is None:
+                    return match.group(0)
+                if is_branch:
+                    return name
+                # Only add `offset` when the substituted token is sitting in
+                # an "immediate slot" — i.e., NOT inside square brackets.
+                start = match.start()
+                # Walk back to find the most recent `[` or `]`; whichever
+                # is closer to the match tells us whether we're inside [].
+                close = op_str.rfind("]", 0, start)
+                openb = op_str.rfind("[", 0, start)
+                inside_brackets = openb > close
+                if inside_brackets:
+                    # If the hex is preceded by `+ ` or `- ` inside the
+                    # brackets, it's a displacement in a register-relative
+                    # expression (`[reg + 0xN]`); the value is a constant
+                    # offset, not an address — substituting would yield
+                    # an "*ABS* - *UND*" link error. Keep raw.
+                    #
+                    # Exception: `[reg*N + 0xADDR]` is jump-table dispatch
+                    # where the displacement IS the table base, so we DO
+                    # substitute when the sign is `+`. `[reg*N - 0xADDR]`
+                    # never has a meaningful symbolic interpretation, so
+                    # keep raw regardless.
+                    preceding = op_str[openb + 1 : start].rstrip()
+                    if preceding and preceding[-1] == "-":
+                        return match.group(0)
+                    if (
+                        preceding
+                        and preceding[-1] == "+"
+                        and "*" not in op_str[openb + 1 : start]
+                    ):
+                        return match.group(0)
+                    return name
+                return f"offset {name}"
+
+            return _HEX_RE.sub(repl, op_str)
+
+        def operand_comments(insn) -> str:
+            """Render a tail-of-line `/* ... */` comment when an operand
+            carries information that's NOT already encoded by the
+            substituted op_str — primarily inline string previews for
+            data pointers. The plain `0x...=label` mapping is suppressed
+            because `substitute_op_str` already swaps the hex for the
+            label in the visible instruction text."""
+            notes: list = []
+            seen: Set[int] = set()
+            for op in insn.operands:
+                if op.type == capstone.CS_OP_IMM:
+                    imm = op.imm & _addr_mask
+                    if imm in seen:
+                        continue
+                    text = peek_string(imm)
+                    if text is None:
+                        continue
+                    notes.append(f"{imm:#x} {text}")
+                    seen.add(imm)
+                elif op.type == capstone.CS_OP_MEM:
+                    # Absolute (32-bit) memory operand.
+                    if op.mem.base == 0 and op.mem.index == 0 and op.mem.segment == 0:
+                        addr = op.mem.disp & _addr_mask
+                        if addr in seen:
+                            continue
+                        text = peek_string(addr)
+                        if text is None:
+                            continue
+                        notes.append(f"[{addr:#x}] {text}")
+                        seen.add(addr)
+                        continue
+                    # PE32+ RIP-relative: compute absolute target; surface
+                    # a string preview if it points at one.
+                    if op.mem.base != 0:
+                        try:
+                            reg_name = insn.reg_name(op.mem.base) or ""
+                        except Exception:
+                            reg_name = ""
+                        if (
+                            reg_name == "rip"
+                            and op.mem.index == 0
+                            and op.mem.segment == 0
+                        ):
+                            target = (
+                                insn.address + insn.size + op.mem.disp
+                            ) & 0xFFFFFFFFFFFFFFFF
+                            if target in seen:
+                                continue
+                            text = peek_string(target)
+                            if text is None:
+                                continue
+                            notes.append(f"[rip→0x{target:x}] {text}")
+                            seen.add(target)
+            if not notes:
+                return ""
+            return "  /* " + ", ".join(notes) + " */"
+
+        def label_for(addr: int) -> str:
+            if addr in thunk_labels:
+                return thunk_labels[addr]
+            sym = seg_symbols.get(addr)
+            if sym is not None:
+                return sym.name
+            if addr in synth_labels:
+                return synth_labels[addr]
+            if addr == entry_va:
+                return "entrypoint"
+            if addr in call_targets:
+                return f"func_{addr:08X}"
+            return f"loc_{addr:08X}"
+
+        labelled: Set[int] = set(seg_symbols) | call_targets | jump_targets
+
+        exact = self.exact_encoding
+
+        with out_path.open("w", encoding="utf-8", newline="\n") as f:
+            preamble = options.opts.generated_s_preamble
+            if preamble:
+                f.write(preamble + "\n\n")
+            f.write(self.get_section_asm_line() + "\n\n")
+            if not exact:
+                # `.intel_syntax noprefix` only matters when we emit
+                # actual mnemonics; exact-encoding mode emits raw bytes.
+                f.write(".intel_syntax noprefix\n")
+            f.write(f".global {self.name}\n")
+            f.write(f"{self.name}:\n")
+
+            # Walk the byte range forward. Capstone stops on the first
+            # undecodable instruction; emit the bad byte as `.byte` data and
+            # resume one byte later so jump-tables / embedded data inside
+            # real-world .text sections don't truncate the disassembly.
+            #
+            # Runs of int3 (0xCC) or nop (0x90) ≥ 2 bytes are collapsed into
+            # a single `.byte` line — MSVC pads between functions with these
+            # so the saving is large in real-world binaries.
+            # Pre-compute the set of offsets inside this segment where the
+            # base-relocation table marks an embedded 32-bit pointer. These
+            # are typically MSVC switch jump-tables or function-pointer
+            # tables that live inside `.text` itself; we must emit them as
+            # `.long <label>` rather than disassembling them as code.
+            ptr_size, ptr_fmt, ptr_directive, _ = win32_platform.ptr_layout(
+                pe.is_pe32_plus
+            )
+            seg_start_rva = vram - pe.image_base
+            seg_end_rva = seg_start_rva + decode_len
+            embedded_ptr_offsets: Set[int] = set()
+            for rva in pe.pointer_rvas:
+                if seg_start_rva <= rva < seg_end_rva:
+                    embedded_ptr_offsets.add(rva - seg_start_rva)
+
+            offset = 0
+            while offset < data_len:
+                # Embedded pointer slot: emit as `.long` (or `.quad`) and
+                # advance past the slot without invoking capstone.
+                if offset in embedded_ptr_offsets and offset + ptr_size <= data_len:
+                    here_va = vram + offset
+                    # Anchor the label first if anything references this
+                    # slot — switch-jump dispatch loads `[<table> + idx*4]`
+                    # via the table-base address.
+                    if here_va in labelled:
+                        is_func = (
+                            here_va in seg_symbols
+                            or here_va == entry_va
+                            or here_va in call_targets
+                        )
+                        if is_func:
+                            f.write(f"\n.global {label_for(here_va)}\n")
+                        f.write(f"{label_for(here_va)}:\n")
+                    tgt = struct.unpack_from(ptr_fmt, decode_data, offset)[0]
+                    ptr_label: Optional[str] = (
+                        None if exact else (resolve_sym(tgt) if tgt else None)
+                    )
+                    if ptr_label is not None:
+                        f.write(
+                            f"    {ptr_directive} {ptr_label}  /* 0x{here_va:08X} = 0x{tgt:X} */\n"
+                        )
+                    else:
+                        f.write(
+                            f"    {ptr_directive} 0x{tgt:X}  /* 0x{here_va:08X} */\n"
+                        )
+                    offset += ptr_size
+                    continue
+                # Check for a run of padding bytes that isn't broken by a
+                # labelled address we still need to anchor.
+                if decode_data[offset] in (0xCC, 0x90):
+                    pad = decode_data[offset]
+                    run = 1
+                    while (
+                        offset + run < data_len
+                        and decode_data[offset + run] == pad
+                        and (vram + offset + run) not in labelled
+                    ):
+                        run += 1
+                    if run >= 2:
+                        kind = "int3" if pad == 0xCC else "nop"
+                        f.write(
+                            "    .byte "
+                            + ", ".join([f"0x{pad:02X}"] * run)
+                            + f"  /* 0x{vram + offset:08X} ({run}× {kind} padding) */\n"
+                        )
+                        offset += run
+                        continue
+
+                produced_any = False
+                for insn in engine.disasm(decode_data[offset:], vram + offset):
+                    if insn.address in labelled:
+                        # Function-style label gets a `.global` line so it can
+                        # be linked against; local block labels do not.
+                        is_func = (
+                            insn.address in seg_symbols
+                            or insn.address == entry_va
+                            or insn.address in call_targets
+                        )
+                        if is_func:
+                            f.write(f"\n.global {label_for(insn.address)}\n")
+                        f.write(f"{label_for(insn.address)}:\n")
+                    raw_mnem = insn.mnemonic.strip()
+                    if exact:
+                        # Byte-identical mode: emit the original bytes as
+                        # `.byte` directives, with the decoded instruction
+                        # in a trailing comment for readability.
+                        insn_bytes = bytes(insn.bytes)
+                        hexed = ", ".join(f"0x{b:02X}" for b in insn_bytes)
+                        f.write(
+                            f"    .byte {hexed}"
+                            f"  /* 0x{insn.address:08X}: "
+                            f"{raw_mnem} {insn.op_str}".rstrip()
+                            + " */\n"
+                        )
+                    else:
+                        rendered_ops = substitute_op_str(insn)
+                        mnemonic = _MNEMONIC_REWRITES.get(raw_mnem, insn.mnemonic)
+                        # Drop GAS-incompatible size qualifiers on operands
+                        # of state-save instructions; translate older names.
+                        if raw_mnem in _SIZELESS_MEMORY_MNEMONICS:
+                            rendered_ops = re.sub(
+                                r"\b(?:byte|word|dword|qword|tbyte|xword) ptr ",
+                                "",
+                                rendered_ops,
+                            )
+                        for old, new in _OPERAND_REWRITES:
+                            rendered_ops = rendered_ops.replace(old, new)
+                        # Strip capstone's `riz`/`eiz` "no index" placeholder
+                        # — GAS doesn't recognise either. Cover all three
+                        # positional forms.
+                        rendered_ops = _NO_INDEX_TAIL_RE.sub("", rendered_ops)
+                        rendered_ops = _NO_INDEX_HEAD_RE.sub("[", rendered_ops)
+                        rendered_ops = _NO_INDEX_LONE_RE.sub("[0]", rendered_ops)
+                        # Scalar SSE doubles want `qword ptr`, not capstone's
+                        # `xmmword ptr`; scalar singles want `dword ptr`.
+                        if raw_mnem in _SCALAR_SSE_DOUBLE:
+                            rendered_ops = rendered_ops.replace(
+                                "xmmword ptr ", "qword ptr "
+                            )
+                        elif raw_mnem in _SCALAR_SSE_SINGLE:
+                            rendered_ops = rendered_ops.replace(
+                                "xmmword ptr ", "dword ptr "
+                            )
+                        # `enter imm16, imm8` rejects signed negative second
+                        # operands in GAS intel-syntax; normalise to unsigned.
+                        if raw_mnem == "enter":
+                            rendered_ops = re.sub(
+                                r"-0x([0-9a-fA-F]+)",
+                                lambda m: f"0x{(0x100 - int(m.group(1), 16)) & 0xFF:X}",
+                                rendered_ops,
+                            )
+                        f.write(
+                            f"    {mnemonic} {rendered_ops}".rstrip()
+                            + f"  /* 0x{insn.address:08X} */"
+                            + operand_comments(insn)
+                            + "\n"
+                        )
+                    offset = (insn.address + insn.size) - vram
+                    produced_any = True
+                    # If we're about to walk into a padding run or an
+                    # embedded pointer slot, hand control back to the outer
+                    # loop so it can render that range directly.
+                    if offset < data_len:
+                        if (
+                            decode_data[offset] in (0xCC, 0x90)
+                            and (vram + offset) not in labelled
+                        ):
+                            break
+                        if offset in embedded_ptr_offsets:
+                            break
+                if offset >= data_len:
+                    break
+                if not produced_any:
+                    f.write(
+                        f"    .byte 0x{decode_data[offset]:02X}  /* 0x{vram + offset:08X} (undecodable) */\n"
+                    )
+                    offset += 1
+
+            if trailing_pad > 0:
+                # In exact_encoding mode preserve the actual padding
+                # bytes — MSVC linkers fill .text tail with 0xCC, not
+                # zero. `.space` would zero them.
+                if exact:
+                    pad_bytes = data[decode_len : decode_len + trailing_pad]
+                    hexed = ", ".join(f"0x{b:02X}" for b in pad_bytes)
+                    f.write(
+                        f"\n    .byte {hexed}  /* {trailing_pad} bytes file-alignment padding */\n"
+                    )
+                else:
+                    f.write(
+                        f"\n    .space 0x{trailing_pad:X}  /* file-alignment padding */\n"
+                    )
+
+        self.log(f"Wrote {self.name} to {out_path}")
+
+        # Sidecar index file: `<segname>.functions.txt` next to the .s file.
+        # Lists every function-style label and its byte length so callers can
+        # navigate the huge text dump without grepping it.
+        func_starts = sorted(
+            set(call_targets)
+            | set(seg_symbols.keys())
+            | ({entry_va} if vram <= entry_va < seg_end else set())
+        )
+        end_marker = vram + decode_len
+        addr_width = 16 if pe.is_pe32_plus else 8
+
+        # Pre-compute kind-source sets so the per-entry lookup is O(1).
+        _export_vas = {pe.image_base + e.rva for e in pe.exports if e.forwarder is None}
+        _tls_vas = set(pe.tls_callback_vas)
+        _seh_vas = {pe.image_base + rva for rva in pe.safe_seh_handlers}
+        _cfg_vas = {pe.image_base + rva for rva in pe.cfg_function_rvas}
+        _rt_vas = {pe.image_base + begin for begin, _e, _u in pe.runtime_functions}
+
+        def kind_for(addr: int) -> str:
+            if addr in thunk_labels:
+                return "thunk"
+            if addr in seg_symbols and seg_symbols[addr] is not None:
+                return "decl"
+            if addr in _export_vas:
+                return "export"
+            if addr == entry_va:
+                return "entry"
+            if addr in _tls_vas:
+                return "tls"
+            if addr in _seh_vas:
+                return "seh"
+            if addr in _cfg_vas:
+                return "cfg"
+            if addr in _rt_vas:
+                return "rt"
+            return "func"
+
+        idx_path = out_path.with_suffix(".functions.txt")
+        with idx_path.open("w", encoding="utf-8", newline="\n") as idx:
+            idx.write(f"# segment: {self.name}\n")
+            idx.write("# columns: VA(hex)  size(decimal bytes)  kind  label\n")
+            for i, addr in enumerate(func_starts):
+                next_addr = (
+                    func_starts[i + 1] if i + 1 < len(func_starts) else end_marker
+                )
+                size = next_addr - addr
+                idx.write(
+                    f"0x{addr:0{addr_width}X}  {size:>8d}  {kind_for(addr):<6s}  {label_for(addr)}\n"
+                )
+        self.log(f"Wrote function index to {idx_path}")

From c1aa98209acaab143f9fa3c006ff26d70751414e Mon Sep 17 00:00:00 2001
From: "Marcel W. Wysocki" <maci.stgn@gmail.com>
Date: Wed, 20 May 2026 11:55:48 +0800
Subject: [PATCH 3/5] win32: wire platform into options / compiler /
 disassembler-factory / create_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Splat-core hooks needed to expose the new win32 platform:

- util/options.py: accept win32 in the parse_opt_within list of
  valid platforms. Default auto_link_sections to [] for
  platform: win32 (the existing MIPS-style [.data, .rodata, .bss]
  default generates phantom LinkerEntries on PE binaries because
  each section is its own subsegment rather than an implicit
  sibling of a base text segment).

- util/compiler.py: register MSVC2..14, MINGW, CLANG_LLD. All
  share the same MASM-style asm config (.globl for symbols, no
  end-label, no INCLUDE_ASM). Distinct names keep generated
  configs documenting which toolchain produced the binary.

- util/file_presets.py: short-circuit for platform == "win32" in
  write_assembly_inc_files — the win32 segtypes emit asm directly
  with .section ... , "<flags>" headers and don't rely on the
  macros.inc / labels.inc helpers.

- disassembler/disassembler_instance.py: route
  platform == "win32" to the Capstone facade added in the earlier
  commit.

- segtypes/__init__.py + platforms/__init__.py: re-export the new
  win32 packages.

- scripts/create_config.py: new create_win32_config branch.
  Detects PE files (MZ + PE magic) and emits a YAML + symbol_addrs
  layout covering every parsed directory:
    * Per-section subsegments (text / data / rodata / pdata / bss /
      bin per the section characteristics; .reloc and .rsrc as bin).
    * Entrypoint, exports (incl. forwarder comments), eager imports,
      delay imports, TLS callbacks, SafeSEH handlers, /guard:cf
      targets, /GS security cookie, .NET CLR pointers, unwind RVAs,
      all as named symbol_addrs entries with type tags.
    * MSVC version auto-detected from MajorLinkerVersion;
      MinGW / Clang-LLD recognised via section names + import DLL
      fingerprints.
    * Post-section appendages (COFF symtab, Authenticode signature)
      emitted as bin segments with a high-address sandbox VMA so
      the linker doesn't fold them onto loaded sections.
    * Sanitisation rules shared with the segtypes via
      platforms.win32.sanitize_label so disassembly references
      resolve to the same identifiers the YAML declares.

No behaviour change for existing platforms — all win32 logic is
gated behind platform == "win32".
---
 .../disassembler/disassembler_instance.py     |   9 +
 src/splat/platforms/__init__.py               |   1 +
 src/splat/scripts/create_config.py            | 478 +++++++++++++++++-
 src/splat/segtypes/__init__.py                |   1 +
 src/splat/util/compiler.py                    |  48 ++
 src/splat/util/file_presets.py                |   4 +
 src/splat/util/options.py                     |  19 +-
 7 files changed, 555 insertions(+), 5 deletions(-)

diff --git a/src/splat/disassembler/disassembler_instance.py b/src/splat/disassembler/disassembler_instance.py
index 1745a442..523f4fc5 100644
--- a/src/splat/disassembler/disassembler_instance.py
+++ b/src/splat/disassembler/disassembler_instance.py
@@ -1,5 +1,6 @@
 from .disassembler import Disassembler
 from .spimdisasm_disassembler import SpimdisasmDisassembler
+from .capstone_disassembler import CapstoneDisassembler
 from .null_disassembler import NullDisassembler
 
 from ..util import options
@@ -19,6 +20,14 @@ def create_disassembler_instance(skip_version_check: bool, splat_version: str):
         __instance.configure()
         return
 
+    if options.opts.platform == "win32":
+        __instance = CapstoneDisassembler()
+        __initialized = True
+
+        __instance.check_version(skip_version_check, splat_version)
+        __instance.configure()
+        return
+
     raise NotImplementedError("No disassembler for requested platform")
 
 
diff --git a/src/splat/platforms/__init__.py b/src/splat/platforms/__init__.py
index 173970fc..88fe1f66 100644
--- a/src/splat/platforms/__init__.py
+++ b/src/splat/platforms/__init__.py
@@ -2,3 +2,4 @@
 from . import ps2 as ps2
 from . import psx as psx
 from . import psp as psp
+from . import win32 as win32
diff --git a/src/splat/scripts/create_config.py b/src/splat/scripts/create_config.py
index 56ef8652..5b29e626 100644
--- a/src/splat/scripts/create_config.py
+++ b/src/splat/scripts/create_config.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 import subprocess
 import sys
-from typing import Optional
+from typing import List, Optional
 
 from ..util.n64 import find_code_length, rominfo
 from ..util.psx import psxexeinfo
@@ -36,6 +36,16 @@ def main(file_path: Path, objcopy: Optional[str]):
         do_elf(file_path, file_bytes, objcopy)
         return
 
+    # Check for Win32 PE
+    if file_bytes[0:2] == b"MZ" and len(file_bytes) >= 0x40:
+        pe_off = int.from_bytes(file_bytes[0x3C:0x40], "little")
+        if (
+            pe_off + 4 <= len(file_bytes)
+            and file_bytes[pe_off : pe_off + 4] == b"PE\x00\x00"
+        ):
+            create_win32_config(file_path, file_bytes)
+            return
+
     log.error(f"create_config does not support the file format of '{file_path}'")
 
 
@@ -374,6 +384,466 @@ def create_psx_config(exe_path: Path, exe_bytes: bytes):
     file_presets.write_all_files()
 
 
+def create_win32_config(exe_path: Path, exe_bytes: bytes):
+    from ..platforms import win32 as _w32
+    from ..platforms.win32 import (
+        parse_pe,
+        SCN_CNT_CODE,
+        SCN_CNT_UNINITIALIZED_DATA,
+        SCN_MEM_EXECUTE,
+        SCN_MEM_WRITE,
+    )
+
+    pe = parse_pe(exe_bytes)
+    basename = exe_path.name.replace(" ", "").lower()
+    cleaned_basename = remove_invalid_path_characters(basename)
+    if not cleaned_basename:
+        # Pathological filename (all spaces / all invalid chars stripped
+        # to empty) would produce a bare ".yaml" / ".ld" output and a
+        # YAML basename: '' that splat rejects. Fall back to a synthetic
+        # placeholder so generated artefacts still have names.
+        cleaned_basename = "pe_target"
+        basename = cleaned_basename
+
+    sha1 = hashlib.sha1(exe_bytes).hexdigest()
+
+    # Quote paths to survive YAML special characters (spaces, ':', '#'
+    # are all syntactically meaningful when unquoted).
+    def _yaml_quote(s: object) -> str:
+        text = str(s)
+        return '"' + text.replace("\\", "\\\\").replace('"', '\\"') + '"'
+
+    # Pick a compiler tag from telltale PE characteristics. The bulk of
+    # MSVC-linked binaries are identified via MajorLinkerVersion; MinGW
+    # and Clang-LLD are recognised via distinct fingerprints — MinGW
+    # builds typically import from `msvcrt.dll` or `libgcc_s_*.dll`,
+    # ship a `.idata`/`.CRT`/`.bss` section layout, and use linker
+    # version 2.x or 1.x. LLD-linked PEs identify themselves through
+    # a "Rich"-less DOS stub plus a `.rdata$zzzdebug` section, but we
+    # rely on the simpler heuristic: any import of `libc++.dll` or a
+    # GCC runtime stamps the binary as MinGW.
+    _LINKER_TO_MSVC = {
+        2: "MSVC2",
+        3: "MSVC4",
+        4: "MSVC4",
+        5: "MSVC5",
+        6: "MSVC6",
+        7: "MSVC7",
+        8: "MSVC8",
+        9: "MSVC9",
+        10: "MSVC10",
+        11: "MSVC11",
+        12: "MSVC12",
+        14: "MSVC14",
+    }
+
+    def _detect_compiler() -> str:
+        dlls_lower = {imp.dll.lower() for imp in pe.imports}
+        # MinGW (gcc-linked) signatures: links to libgcc, libstdc++,
+        # libwinpthread, or has a .CRT section.
+        mingw_dlls = {
+            "libgcc_s_dw2-1.dll",
+            "libgcc_s_seh-1.dll",
+            "libstdc++-6.dll",
+            "libwinpthread-1.dll",
+            "libgcc_s.dll",
+            "libssp-0.dll",
+        }
+        section_names = {s.name for s in pe.sections}
+        if dlls_lower & mingw_dlls or ".CRT" in section_names:
+            return "MINGW"
+        # LLD signature: linker_major 14 but characteristics differ.
+        # Conservative: only flag if .text$mn or .rdata$zzzdebug are
+        # present (LLD-specific section grouping).
+        if any(
+            s.startswith(".text$") or s.startswith(".rdata$") for s in section_names
+        ):
+            return "CLANG_LLD"
+        return _LINKER_TO_MSVC.get(pe.linker_major, "MSVC6")
+
+    compiler_tag = _detect_compiler()
+
+    header = f"""\
+# name: {exe_path.name}
+sha1: {sha1}
+options:
+  basename: {basename}
+  target_path: {_yaml_quote(exe_path)}
+  base_path: .
+  platform: win32
+  compiler: {compiler_tag}
+
+  # asm_path: asm
+  # src_path: src
+  # build_path: build
+
+  ld_script_path: {cleaned_basename}.ld
+  ld_dependencies: True
+
+  o_as_suffix: True
+
+  section_order: [".header", ".text", ".rdata", ".data", ".pdata", ".rodata", ".bss"]
+
+  symbol_addrs_path:
+    - symbol_addrs.txt
+
+  # undefined_funcs_auto_path: undefined_funcs_auto.txt
+  # undefined_syms_auto_path: undefined_syms_auto.txt
+
+  extensions_path: tools/splat_ext
+
+  string_encoding: ASCII
+  data_string_encoding: ASCII
+"""
+
+    # A section with raw_size > 0 but raw_pointer == 0 is loader-treated
+    # as uninitialised at runtime (the file simply doesn't back any bytes
+    # for it). Group those with the BSS bucket so we don't generate a
+    # file-backed segment pointing at offset 0 (the DOS header).
+    def _is_file_backed(s) -> bool:
+        return s.raw_size > 0 and s.raw_pointer > 0
+
+    segments = "\nsegments:\n"
+    segments += """\
+  - name: header
+    type: header
+    start: 0x0
+
+"""
+
+    # Order sections by file position so segments stay monotonically
+    # increasing in rom_start (splat requires this).
+    file_sections = sorted(
+        (s for s in pe.sections if _is_file_backed(s)),
+        key=lambda s: s.raw_pointer,
+    )
+    bss_sections = [
+        s
+        for s in pe.sections
+        if not _is_file_backed(s)
+        and (s.characteristics & SCN_CNT_UNINITIALIZED_DATA or s.virtual_size > 0)
+    ]
+
+    # Disambiguate duplicate section names (PE spec doesn't require
+    # uniqueness; some packers and hand-crafted images have repeats).
+    seen_names: dict = {}
+
+    def _unique_name(raw: str) -> str:
+        n = remove_invalid_path_characters(raw.lstrip(".") or "section")
+        # GAS labels can't start with a digit (PuTTY's `.00cfg`, MSVC's
+        # `.rdata$zzzdebug` numeric subsection, etc.). Prefix with `_`
+        # to keep the resulting `<name>_main` global label valid.
+        if n and n[0].isdigit():
+            n = "_" + n
+        count = seen_names.get(n, 0)
+        seen_names[n] = count + 1
+        return n if count == 0 else f"{n}_{count}"
+
+    for s in file_sections:
+        # Derive a sensible subsegment type. Special-case `.pdata` to
+        # the dedicated Win32SegPdata so RUNTIME_FUNCTION rows render
+        # structured instead of as opaque byte runs; treat `.reloc` /
+        # `.rsrc` as opaque binary since they hold structured loader
+        # data, not GAS-meaningful pointers or strings.
+        if pe.is_pe32_plus and s.name == ".pdata":
+            sub_type = "pdata"
+        elif s.name in (".reloc", ".rsrc"):
+            sub_type = "bin"
+        elif s.characteristics & (SCN_CNT_CODE | SCN_MEM_EXECUTE):
+            sub_type = "text"
+        elif s.characteristics & SCN_MEM_WRITE:
+            sub_type = "data"
+        else:
+            sub_type = "rodata"
+
+        safe_name = _unique_name(s.name)
+        vram = pe.image_base + s.virtual_address
+        segments += f"""\
+  - name: {safe_name}
+    type: code
+    start: 0x{s.raw_pointer:X}
+    vram: 0x{vram:08X}
+    subsegments:
+      - [0x{s.raw_pointer:X}, {sub_type}, {safe_name}_main]
+
+"""
+
+        # Virtual-only tail: file-backed section that extends in memory
+        # past its raw bytes (MSVC zero-init globals). Model as a BSS
+        # segment so the linker layout matches the runtime image.
+        # Virtual-only tail: file-backed section that extends in memory
+        # past its raw bytes — loader zero-fills the tail. Applies to
+        # both writable .data (MSVC zero-init globals) and any other
+        # section with VirtualSize > SizeOfRawData (occasionally seen on
+        # .rdata when constants are aligned past the file boundary).
+        if s.virtual_size > s.raw_size:
+            tail_vram = pe.image_base + s.virtual_address + s.raw_size
+            tail_size = s.virtual_size - s.raw_size
+            segments += f"""\
+  - {{ name: {safe_name}_bss, type: bss, vram: 0x{tail_vram:08X}, bss_size: 0x{tail_size:X} }}
+
+"""
+
+    for s in bss_sections:
+        # Sections claiming "uninitialized data" with VirtualSize 0 carry
+        # no runtime footprint — skip rather than emit `bss_size: 0x0`
+        # which splat treats as a malformed segment.
+        if s.virtual_size == 0:
+            continue
+        safe_name = _unique_name(s.name or "bss")
+        vram = pe.image_base + s.virtual_address
+        segments += f"""\
+  - {{ name: {safe_name}, type: bss, vram: 0x{vram:08X}, bss_size: 0x{s.virtual_size:X} }}
+
+"""
+
+    # Tack on a `bin` segment for the COFF symbol table if the optional
+    # header points at one. Modern MSVC binaries don't emit it (PDB
+    # replaces it) but vintage MSVC 4-6 binaries still ship it past the
+    # last raw-data section. The trailing `[len(exe_bytes)]` entry
+    # delimits its end.
+    # Post-section appendages (COFF symtab, Authenticode signature) sit
+    # past the last section's raw bytes. Collect them, sort by file
+    # offset, and emit in order — splat requires segments to be
+    # monotonically increasing by rom_start.
+    tail_segs: List[tuple] = []
+    if (
+        pe.coff_symtab_ptr
+        and pe.coff_num_symbols
+        and pe.coff_symtab_ptr < len(exe_bytes)
+    ):
+        tail_segs.append((pe.coff_symtab_ptr, "coff_symtab"))
+    if len(pe.data_directories) > 4:
+        cert_ptr, cert_size = pe.data_directories[_w32.DIR_CERTIFICATE]
+        # Authenticode signature: directory entry 4 (Certificate Table)
+        # is a FILE offset / size pair (unlike the RVA-based entries).
+        if cert_ptr and cert_size and cert_ptr < len(exe_bytes):
+            tail_segs.append((cert_ptr, "signature"))
+    # Post-section appendages have file offsets but no defined load
+    # VAs (the PE loader doesn't map them). Splat needs *some* VMA
+    # for each segment, so pin them at a high reserved range — well
+    # past the last section's VirtualAddress + VirtualSize — to keep
+    # the linker from assigning overlapping addresses.
+    tail_vma = pe.image_base + 0x10000000
+    for start, name in sorted(tail_segs, key=lambda t: t[0]):
+        segments += (
+            f"  - {{ name: {name}, type: bin, "
+            f"start: 0x{start:X}, vram: 0x{tail_vma:X} }}\n\n"
+        )
+        tail_vma += 0x100000
+
+    segments += f"  - [0x{len(exe_bytes):X}]\n"
+
+    out_file = Path(f"{cleaned_basename}.yaml")
+    with out_file.open("w", encoding="utf-8", newline="\n") as f:
+        print(f"Writing config to {out_file}")
+        f.write(header)
+        f.write(segments)
+
+    conf.load([out_file])
+    file_presets.write_all_files()
+
+    # Stash the entry point and any exported function as known symbols so
+    # the disassembly labels them. Some DLLs are built without DllMain and
+    # leave AddressOfEntryPoint = 0 — skip the entrypoint symbol in that
+    # case so we don't emit a label pointing at the PE header.
+    _sanitize_id = _w32.sanitize_label
+
+    symbol_addrs: List[str] = []
+    if pe.entry_point_rva:
+        symbol_addrs.append(f"entrypoint = 0x{pe.entry_point_va:08X}; // type:func")
+    export_labels = _w32.compute_export_labels(
+        pe, reserved={"entrypoint"} if pe.entry_point_rva else set()
+    )
+    # Build a {ordinal: label} lookup so we can emit them in iteration
+    # order while still using the centralised dedup-aware map.
+    va_to_label = export_labels
+    # Only print the "// Exports from X" header when there's at least one
+    # non-forwarder export — DLLs that re-export everything (e.g.
+    # apisetschema, downlevel shims) would otherwise emit a header with
+    # zero following rows.
+    named_exports = [e for e in pe.exports if e.forwarder is None]
+    if named_exports:
+        symbol_addrs.append("")
+        symbol_addrs.append(f"// Exports from {pe.export_dll_name or exe_path.name}")
+        for exp in named_exports:
+            va = pe.image_base + exp.rva
+            safe = va_to_label.get(va)
+            if safe is None:
+                continue
+            trailing = f"// type:func -- ordinal {exp.ordinal}"
+            if exp.name and safe != exp.name:
+                trailing += f" (original {exp.name})"
+            symbol_addrs.append(f"{safe} = 0x{va:08X}; {trailing}")
+    forwarders = [e for e in pe.exports if e.forwarder is not None]
+    if forwarders:
+        symbol_addrs.append("")
+        symbol_addrs.append("// Forwarded exports (live outside this DLL)")
+        for exp in forwarders:
+            name = exp.name or f"export_{exp.ordinal}"
+            safe = _sanitize_id(name)
+            # No real VA — record as a comment so users see the mapping.
+            symbol_addrs.append(
+                f"// {safe}  ->  {exp.forwarder}  (ordinal {exp.ordinal})"
+            )
+
+    iat_labels = _w32.compute_iat_labels(pe)
+    if pe.imports:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// IAT slots (each `call dword ptr [<va>]` reaches one of these)"
+        )
+        for imp in pe.imports:
+            slot_va = pe.image_base + imp.iat_rva
+            full = iat_labels.get(slot_va)
+            if full is None or not full.startswith("imp_"):
+                continue
+            trailing = f"// type:u32 -- import from {imp.dll}"
+            if imp.ordinal is not None:
+                trailing += f" ordinal {imp.ordinal}"
+            symbol_addrs.append(f"{full} = 0x{slot_va:08X}; {trailing}")
+
+    if pe.delay_imports:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// Delay-load IAT slots (resolved on first call via __delayLoadHelper2)"
+        )
+        for imp in pe.delay_imports:
+            slot_va = pe.image_base + imp.iat_rva
+            full = iat_labels.get(slot_va)
+            if full is None or not full.startswith("dimp_"):
+                continue
+            trailing = f"// type:u32 -- delay-loaded import from {imp.dll}"
+            if imp.ordinal is not None:
+                trailing += f" ordinal {imp.ordinal}"
+            symbol_addrs.append(f"{full} = 0x{slot_va:08X}; {trailing}")
+
+    if pe.clr_header is not None and pe.clr_header.metadata_rva:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// .NET CLR header — points at the assembly's metadata "
+            "(ECMA-335) and entry-point token."
+        )
+        md_va = pe.image_base + pe.clr_header.metadata_rva
+        symbol_addrs.append(f"clr_metadata = 0x{md_va:08X}; // type:u8")
+        if pe.clr_header.strong_name_signature_rva:
+            sn_va = pe.image_base + pe.clr_header.strong_name_signature_rva
+            symbol_addrs.append(
+                f"clr_strong_name_signature = 0x{sn_va:08X}; // type:u8"
+            )
+        if pe.clr_header.resources_rva:
+            res_va = pe.image_base + pe.clr_header.resources_rva
+            symbol_addrs.append(f"clr_resources = 0x{res_va:08X}; // type:u8")
+
+    if pe.security_cookie_va:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// /GS security cookie (xor'd with frame pointer at function entry)"
+        )
+        symbol_addrs.append(
+            f"security_cookie = 0x{pe.security_cookie_va:08X}; // type:u32"
+        )
+
+    if pe.tls_callback_vas:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// TLS callbacks (run by the loader before DllMain / entrypoint)"
+        )
+        for idx, cb_va in enumerate(pe.tls_callback_vas):
+            symbol_addrs.append(f"tls_callback_{idx} = 0x{cb_va:08X}; // type:func")
+
+    if pe.safe_seh_handlers:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// SafeSEH handlers (registered exception filter functions)"
+        )
+        for idx, rva in enumerate(pe.safe_seh_handlers):
+            va = pe.image_base + rva
+            symbol_addrs.append(f"safeseh_{idx} = 0x{va:08X}; // type:func")
+
+    if pe.runtime_functions:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// UNWIND_INFO blobs — each RUNTIME_FUNCTION's UnwindInfoAddress "
+            "points at a (variable-length) IMAGE_UNWIND_INFO record."
+        )
+        unwind_cap = 2048
+        seen_unwind: set = set()
+        for begin, _end, uw in pe.runtime_functions[:unwind_cap]:
+            # The high bit of the UnwindInfoAddress flags a chained record —
+            # the same target then naturally collides with itself. Mask off
+            # before symbol emission so multiple chained refs share one
+            # `unwind_<va>` label.
+            base_uw = uw & 0x7FFFFFFF
+            if base_uw == 0 or base_uw in seen_unwind:
+                continue
+            seen_unwind.add(base_uw)
+            va = pe.image_base + base_uw
+            symbol_addrs.append(f"unwind_{va:X} = 0x{va:08X}; // type:u8")
+
+    if pe.cfg_function_rvas:
+        symbol_addrs.append("")
+        symbol_addrs.append(
+            "// /guard:cf valid indirect-call targets — every entry is "
+            "a function the loader's CFG bitmap whitelists."
+        )
+        # CFG tables in real binaries can be huge (ntdll ~40k). Cap the
+        # symbol emission at 1024 to keep symbol_addrs.txt readable; the
+        # text.py call-target seed already covers all entries for label
+        # emission. Pass --full-cfg if you want every row.
+        cfg_cap = 1024
+        shown = pe.cfg_function_rvas[:cfg_cap]
+        for idx, rva in enumerate(shown):
+            va = pe.image_base + rva
+            symbol_addrs.append(f"cfg_target_{idx} = 0x{va:08X}; // type:func")
+        if len(pe.cfg_function_rvas) > cfg_cap:
+            symbol_addrs.append(
+                f"// ... and {len(pe.cfg_function_rvas) - cfg_cap} more CFG "
+                "targets omitted; bump the cap in create_win32_config to list all."
+            )
+
+    with Path("symbol_addrs.txt").open("w", encoding="utf-8", newline="\n") as f:
+        print("Writing symbol_addrs.txt")
+        f.write(
+            "// Visit https://github.com/ethteck/splat/wiki/Adding-Symbols for documentation about this file\n"
+            f"// Generated from {exe_path.name} (sha1 {sha1[:12]}...) by create_win32_config.\n"
+            "// Edits are preserved across re-runs only via merging in a separate symbols file.\n"
+        )
+        body = "\n".join(symbol_addrs)
+        f.write(body)
+        # POSIX convention: text files end with a newline. Avoid the
+        # "missing newline at end of file" lint when symbol_addrs.txt
+        # has no body entries (resource-only DLL, all-forwarder shim).
+        if not body.endswith("\n"):
+            f.write("\n")
+
+    # One-line summary of the corpus so the user knows at-a-glance what
+    # auto-config found in their PE.
+    parts = [
+        f"{len(pe.sections)} sections",
+        f"{len(pe.exports)} exports" if pe.exports else None,
+        f"{len(pe.imports)} imports" if pe.imports else None,
+        f"{len(pe.delay_imports)} delay-imports" if pe.delay_imports else None,
+        f"{len(pe.tls_callback_vas)} TLS callbacks" if pe.tls_callback_vas else None,
+        f"{len(pe.safe_seh_handlers)} SafeSEH handlers"
+        if pe.safe_seh_handlers
+        else None,
+        f"{len(pe.cfg_function_rvas)} CFG targets" if pe.cfg_function_rvas else None,
+        f"{len(pe.runtime_functions)} RUNTIME_FUNCTIONs"
+        if pe.runtime_functions
+        else None,
+        f"{len(pe.unwind_info)} unwind records" if pe.unwind_info else None,
+        f"{len(pe.coff_symbols)} COFF symbols" if pe.coff_symbols else None,
+        f".NET v{pe.clr_header.runtime_major}.{pe.clr_header.runtime_minor}"
+        if pe.clr_header
+        else None,
+    ]
+    summary = ", ".join(p for p in parts if p)
+    print(f"Detected: {summary}.")
+    if pe.pdb_path:
+        print(f"PDB hint: {pe.pdb_path}")
+
+
 def do_elf(elf_path: Path, elf_bytes: bytes, objcopy: Optional[str]):
     elf = ps2elfinfo.Ps2Elf.get_info(elf_path, elf_bytes)
     if elf is None:
@@ -568,7 +1038,7 @@ def run_objcopy(objcopy_name: str, elf_path: str, rom: str) -> list[str]:
 def add_arguments_to_parser(parser: argparse.ArgumentParser):
     parser.add_argument(
         "file",
-        help="Path to a .z64/.n64 ROM, PSX executable or PS2 ELF",
+        help="Path to a .z64/.n64 ROM, PSX executable, PS2 ELF, or Win32 PE",
         type=Path,
     )
     parser.add_argument(
@@ -582,7 +1052,9 @@ def process_arguments(args: argparse.Namespace):
     main(args.file, args.objcopy)
 
 
-script_description = "Create a splat config from an N64 ROM or PSX executable."
+script_description = (
+    "Create a splat config from an N64 ROM, PSX executable, PS2 ELF, or Win32 PE."
+)
 
 
 def add_subparser(subparser: argparse._SubParsersAction):
diff --git a/src/splat/segtypes/__init__.py b/src/splat/segtypes/__init__.py
index 8204998a..a7cfd8be 100644
--- a/src/splat/segtypes/__init__.py
+++ b/src/splat/segtypes/__init__.py
@@ -6,3 +6,4 @@
 from . import ps2 as ps2
 from . import psx as psx
 from . import psp as psp
+from . import win32 as win32
diff --git a/src/splat/util/compiler.py b/src/splat/util/compiler.py
index eeea374a..f3481634 100644
--- a/src/splat/util/compiler.py
+++ b/src/splat/util/compiler.py
@@ -64,6 +64,41 @@ class Compiler:
 MWCCPS2 = Compiler("MWCCPS2", uses_include_asm=False)
 EEGCC = Compiler("EEGCC", align_on_branch_labels=True)
 
+
+# Win32 / PE — every MSVC linker emits MASM-style asm; for splat
+# purposes they all share the same config (.globl for symbols, no
+# end-label, no INCLUDE_ASM). Distinct version tags keep generated
+# configs documenting which MSVC produced the binary so future
+# refactors can specialise per-version if needed.
+def _msvc_compiler(name: str) -> Compiler:
+    return Compiler(
+        name,
+        asm_function_macro=".globl",
+        asm_function_alt_macro=".globl",
+        asm_jtbl_label_macro=".globl",
+        asm_data_macro=".globl",
+        asm_end_label="",
+        asm_data_end_label="",
+        asm_nonmatching_label_macro="",
+        asm_emit_size_directive=False,
+        uses_include_asm=False,
+    )
+
+
+MINGW = _msvc_compiler("MINGW")
+CLANG_LLD = _msvc_compiler("CLANG_LLD")
+MSVC2 = _msvc_compiler("MSVC2")
+MSVC4 = _msvc_compiler("MSVC4")
+MSVC5 = _msvc_compiler("MSVC5")
+MSVC6 = _msvc_compiler("MSVC6")
+MSVC7 = _msvc_compiler("MSVC7")
+MSVC8 = _msvc_compiler("MSVC8")
+MSVC9 = _msvc_compiler("MSVC9")
+MSVC10 = _msvc_compiler("MSVC10")
+MSVC11 = _msvc_compiler("MSVC11")
+MSVC12 = _msvc_compiler("MSVC12")
+MSVC14 = _msvc_compiler("MSVC14")
+
 compiler_for_name: Dict[str, Compiler] = {
     x.name: x
     for x in [
@@ -75,6 +110,19 @@ class Compiler:
         PSYQ,
         MWCCPS2,
         EEGCC,
+        MINGW,
+        CLANG_LLD,
+        MSVC2,
+        MSVC4,
+        MSVC5,
+        MSVC6,
+        MSVC7,
+        MSVC8,
+        MSVC9,
+        MSVC10,
+        MSVC11,
+        MSVC12,
+        MSVC14,
     ]
 }
 
diff --git a/src/splat/util/file_presets.py b/src/splat/util/file_presets.py
index ea940ea6..b80724cf 100644
--- a/src/splat/util/file_presets.py
+++ b/src/splat/util/file_presets.py
@@ -251,6 +251,10 @@ def write_assembly_inc_files():
         gas = macros_inc.replace("\\label", '"\\label"').replace(
             '"\\label"\\().NON_MATCHING', '"\\label\\().NON_MATCHING"'
         )
+    elif options.opts.platform == "win32":
+        # The win32 segtypes emit asm directly with `.section ... , "<flags>"`
+        # headers and don't rely on the macros.inc / labels.inc helpers.
+        gas = ""
     elif not options.opts.is_unsupported_platform:
         log.error(f"Unknown platform '{options.opts.platform}'")
     else:
diff --git a/src/splat/util/options.py b/src/splat/util/options.py
index 27089d51..c67913a6 100644
--- a/src/splat/util/options.py
+++ b/src/splat/util/options.py
@@ -388,7 +388,9 @@ def _parse_yaml(
     if is_unsupported_platform:
         platform = p.parse_opt("platform", str)
     else:
-        platform = p.parse_opt_within("platform", str, ["n64", "psx", "ps2", "psp"])
+        platform = p.parse_opt_within(
+            "platform", str, ["n64", "psx", "ps2", "psp", "win32"]
+        )
 
     comp = compiler.for_name(p.parse_opt("compiler", str, "IDO"))
 
@@ -510,8 +512,21 @@ def parse_include_asm_macro_style() -> Literal["default", "maspsx_hack"]:
         elf_section_list_path=p.parse_optional_path(base_path, "elf_section_list_path"),
         subalign=p.parse_optional_opt_with_default("subalign", int, 16),
         emit_subalign=p.parse_opt("emit_subalign", bool, True),
+        # The `[.data, .rodata, .bss]` default reflects the MIPS / N64
+        # convention where each `.c` file produces a text-base segment
+        # with implicit data/rodata/bss siblings sharing the same name
+        # (and on-disk filename). On platforms whose sections are
+        # declared as independent subsegments — e.g. Win32 PE, where a
+        # `.text` subsegment named `main_text` and a `.data` subsegment
+        # named `main_data` have distinct file paths — the implicit
+        # sibling generation produces phantom LinkerEntries pointing at
+        # non-existent `build/asm/data/main_text.s.o` files. Default to
+        # an empty list there so the linker script only references the
+        # subsegments that were actually emitted.
         auto_link_sections=p.parse_opt(
-            "auto_link_sections", list, [".data", ".rodata", ".bss"]
+            "auto_link_sections",
+            list,
+            [] if platform == "win32" else [".data", ".rodata", ".bss"],
         ),
         ld_script_path=p.parse_path(base_path, "ld_script_path", f"{basename}.ld"),
         ld_symbol_header_path=p.parse_optional_path(base_path, "ld_symbol_header_path"),

From 66ca2ca8ab949d705ec1cdd5ace14eb79bd76ad8 Mon Sep 17 00:00:00 2001
From: "Marcel W. Wysocki" <maci.stgn@gmail.com>
Date: Wed, 20 May 2026 11:55:59 +0800
Subject: [PATCH 4/5] win32: add win32_reassemble post-process script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New python -m splat.scripts.win32_reassemble <yaml> driver that
inverts splat split for win32 binaries.

Pipeline:
  1. Run as on every .s under asm_path / data_path -> .o files at
     the build_path layout the splat-generated linker script
     expects (<build_path>/asm/<rel>.s.o by default, <rel>.o when
     the YAML has o_as_suffix: True).
  2. Wrap any .bin assets via objcopy -I binary -O elf... so they
     can be linked in.
  3. Invoke ld -N -m elf_i386 | elf_x86_64 -T <splat.ld> from
     base_path. The splat linker script already places each section
     at the right LMA / file-offset.
  4. objcopy --set-section-flags .header=alloc,load,data (the
     custom .header section starts READONLY-only from GAS),
     then objcopy -O binary to extract the linked image —
     the .header section already carries the full DOS+COFF+optional
     header bytes, so the binary blob IS the reassembled PE.

With exact_encoding: true on the text/data/pdata subsegments, the
reassembled PE is byte-identical to the original. Verified on
PsExec / PsExec64 / PuTTY 0.60 / PuTTY 0.70 32-bit / PuTTY 0.83
64-bit (716 KB to 1.7 MB; vintage MSVC6 through modern MSVC14;
both PE32 and PE32+).
---
 src/splat/scripts/win32_reassemble.py | 270 ++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 src/splat/scripts/win32_reassemble.py

diff --git a/src/splat/scripts/win32_reassemble.py b/src/splat/scripts/win32_reassemble.py
new file mode 100644
index 00000000..96eb6ca5
--- /dev/null
+++ b/src/splat/scripts/win32_reassemble.py
@@ -0,0 +1,270 @@
+"""Reassemble a splat-split win32 PE back into a single PE/EXE/DLL.
+
+Pipeline:
+
+  1. Run `as` on every .s under asm_path / data_path → .o files placed
+     at the build_path layout the splat-generated linker script
+     expects (build/asm/<rel>.s.o).
+  2. Wrap any .bin assets into ELF objects via `objcopy -I binary -O
+     elf32-i386|elf64-x86-64` so they can be linked in.
+  3. Invoke `ld -T <splat.ld>` from the base_path to produce an ELF
+     image whose section layout matches the original PE.
+  4. Run `objcopy -O pei-i386|pei-x86-64` to convert the ELF to a PE.
+
+Output defaults to `<target>.reasm` next to the original target
+binary so an accidental run doesn't clobber the source.
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from typing import List, Optional
+
+import yaml
+
+from ..util import log
+
+
+def _which(cmd: str) -> str:
+    found = shutil.which(cmd)
+    if not found:
+        log.error(
+            f"win32_reassemble: required tool '{cmd}' not on PATH — "
+            "install binutils (provides as / ld / objcopy)"
+        )
+    return found
+
+
+def _read_yaml(yaml_path: Path) -> dict:
+    return yaml.safe_load(yaml_path.read_text(encoding="utf-8"))
+
+
+def _detect_bitness(yaml_path: Path, conf: dict) -> bool:
+    """Return True if the source PE is PE32+ (x86_64). Inspect the
+    `target_path` binary's optional-header magic."""
+    target = conf["options"].get("target_path")
+    if not target:
+        log.error("win32_reassemble: YAML missing options.target_path")
+    opts = conf["options"]
+    base_path = (yaml_path.parent / opts.get("base_path", ".")).resolve()
+    target_path = (base_path / target).resolve()
+    if not target_path.exists():
+        log.error(f"win32_reassemble: target binary not found at {target_path}")
+    data = target_path.read_bytes()
+    if len(data) < 0x100 or data[:2] != b"MZ":
+        log.error(f"win32_reassemble: {target_path} is not a PE")
+    pe_off = int.from_bytes(data[0x3C:0x40], "little")
+    magic = int.from_bytes(data[pe_off + 0x18 : pe_off + 0x1A], "little")
+    return magic == 0x20B
+
+
+def _run(cmd: List[str], verbose: bool, cwd: Optional[Path] = None) -> None:
+    if verbose:
+        prefix = f"(cd {cwd}) " if cwd else ""
+        print(f"$ {prefix}" + " ".join(str(c) for c in cmd))
+    r = subprocess.run(cmd, capture_output=True, cwd=str(cwd) if cwd else None)
+    if r.returncode != 0:
+        sys.stderr.write(r.stderr.decode(errors="replace"))
+        log.error(f"win32_reassemble: command failed: {cmd[0]}")
+
+
+def _collect_sources(
+    asm_path: Path, data_path: Path, asset_path: Path
+) -> "tuple[List[Path], List[Path]]":
+    """Return (.s sources, .bin assets) under the splat-configured
+    source directories."""
+    s_paths: List[Path] = []
+    seen = set()
+    for root in (asm_path, data_path):
+        if not root.exists():
+            continue
+        for p in sorted(root.rglob("*.s")):
+            if p in seen:
+                continue
+            seen.add(p)
+            s_paths.append(p)
+    bin_paths: List[Path] = []
+    if asset_path.exists():
+        bin_paths = sorted(asset_path.rglob("*.bin"))
+    return s_paths, bin_paths
+
+
+def reassemble(yaml_path: Path, out_path: Path, verbose: bool = False) -> Path:
+    """Drive the full assemble + link + PE-convert pipeline for a
+    splat-generated win32 config. Returns the path to the produced PE."""
+    conf = _read_yaml(yaml_path)
+    opts = conf["options"]
+    base_path = (yaml_path.parent / opts.get("base_path", ".")).resolve()
+    asm_path = (base_path / opts.get("asm_path", "asm")).resolve()
+    data_path = (base_path / opts.get("data_path", "data")).resolve()
+    asset_path = (base_path / opts.get("asset_path", "assets")).resolve()
+    build_path = (base_path / opts.get("build_path", "build")).resolve()
+    ld_path = base_path / opts.get("ld_script_path", "")
+    if not ld_path.exists():
+        log.error(
+            f"win32_reassemble: linker script not found at {ld_path} — "
+            "run `python -m splat split <yaml>` first"
+        )
+
+    is_pe32_plus = _detect_bitness(yaml_path, conf)
+    mode_flag = "--64" if is_pe32_plus else "--32"
+    ld_emulation = "elf_x86_64" if is_pe32_plus else "elf_i386"
+    bin_obj_fmt = "elf64-x86-64" if is_pe32_plus else "elf32-i386"
+    bin_obj_arch = "i386:x86-64" if is_pe32_plus else "i386"
+
+    asm_tool = _which("as")
+    ld_tool = _which("ld")
+    objcopy = _which("objcopy")
+
+    s_paths, bin_paths = _collect_sources(asm_path, data_path, asset_path)
+    if not s_paths and not bin_paths:
+        log.error(
+            "win32_reassemble: no .s or .bin sources found — run "
+            "`python -m splat split <yaml>` first"
+        )
+
+    # The splat-generated linker script references object files at
+    # `<build_path>/<source_relpath><suffix>`. With splat's default
+    # `o_as_suffix: False` the suffix is `.s.o` (`<source>.s.o`); with
+    # `o_as_suffix: True` it's just `.o` (`<source>.o`). Match
+    # whichever the YAML opted into.
+    use_o_as_suffix = bool(opts.get("o_as_suffix", False))
+
+    def _obj_for(src: Path) -> Path:
+        for root in (asm_path, data_path):
+            try:
+                rel = src.relative_to(root)
+                if use_o_as_suffix:
+                    rel = rel.with_suffix(".o")
+                else:
+                    rel = rel.with_suffix(rel.suffix + ".o")
+                return build_path / "asm" / rel
+            except ValueError:
+                continue
+        return src.with_suffix(src.suffix + ".o")
+
+    for s_path in s_paths:
+        o_path = _obj_for(s_path)
+        o_path.parent.mkdir(parents=True, exist_ok=True)
+        _run([asm_tool, mode_flag, str(s_path), "-o", str(o_path)], verbose)
+
+    # Wrap .bin assets so ld can link them. Splat's linker script
+    # references each bin as `<build_path>/assets/<rel>.o` (no .bin
+    # suffix), pulling the `.data` section that `objcopy -I binary`
+    # populates by default. Run `objcopy` from `bin_path.parent` so
+    # the embedded `_binary_<name>_start` symbols come out
+    # deterministic regardless of where the source file lives.
+    for bin_path in bin_paths:
+        try:
+            rel = bin_path.relative_to(asset_path)
+        except ValueError:
+            rel = Path(bin_path.name)
+        o_rel = rel.with_suffix(".o")
+        o_path = build_path / "assets" / o_rel
+        o_path.parent.mkdir(parents=True, exist_ok=True)
+        # objcopy from CWD=bin_path.parent so the auto-generated
+        # `_binary_<basename>_start` symbol uses just the filename.
+        _run(
+            [
+                objcopy,
+                "-I",
+                "binary",
+                "-O",
+                bin_obj_fmt,
+                "-B",
+                bin_obj_arch,
+                bin_path.name,
+                str(o_path.resolve()),
+            ],
+            verbose,
+            cwd=bin_path.parent,
+        )
+
+    # Link via the splat-generated linker script. Run from base_path
+    # so the script's `build/asm/...` references resolve.
+    with tempfile.TemporaryDirectory(prefix="splat-reasm-") as td:
+        elf_path = Path(td) / "linked.elf"
+        # -N (omagic): produce an ELF without page-aligned segments —
+        # the splat .ld layout packs sections contiguously by LMA and
+        # would otherwise blow past the program-header capacity.
+        _run(
+            [
+                ld_tool,
+                "-m",
+                ld_emulation,
+                "-N",
+                "-T",
+                ld_path.name,
+                "-o",
+                str(elf_path),
+            ],
+            verbose,
+            cwd=base_path,
+        )
+        # Force alloc/load on splat's custom .header section so the
+        # binary extraction includes it. GAS marks .header as
+        # READONLY-only because there's no exec/write flag in the
+        # `.section .header` line; that's enough for the linker but
+        # makes `-O binary` skip the bytes.
+        _run(
+            [
+                objcopy,
+                "--set-section-flags",
+                ".header=alloc,load,data",
+                str(elf_path),
+            ],
+            verbose,
+        )
+        # Extract the loaded image as a raw byte blob — the splat
+        # `.header` section already contains the full PE header
+        # (DOS stub + COFF + optional header + section table) and
+        # every other section is positioned at its file-offset by
+        # the linker script. Wrapping with `-O pei-*` would prepend
+        # a second PE header; we just want the bytes verbatim.
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        _run(
+            [objcopy, "-O", "binary", str(elf_path), str(out_path)],
+            verbose,
+        )
+
+    if verbose:
+        sha = hashlib.sha1(out_path.read_bytes()).hexdigest()
+        print(f"Produced {out_path} ({len(out_path.read_bytes())} bytes, sha1 {sha})")
+
+    return out_path
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Reassemble a splat-split Win32 PE back into a single .exe/.dll"
+    )
+    parser.add_argument("yaml", type=Path, help="splat YAML config")
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=None,
+        help="Output PE path (defaults to <target>.reasm)",
+    )
+    parser.add_argument("-v", "--verbose", action="store_true")
+    args = parser.parse_args()
+
+    conf = _read_yaml(args.yaml)
+    target = conf["options"].get("target_path")
+    out_path = args.out
+    if out_path is None:
+        if not target:
+            log.error("win32_reassemble: YAML has no target_path; pass --out")
+        base_path = (args.yaml.parent / conf["options"].get("base_path", ".")).resolve()
+        out_path = Path(str((base_path / target).resolve()) + ".reasm")
+
+    reassemble(args.yaml, out_path, verbose=args.verbose)
+
+
+if __name__ == "__main__":
+    main()

From 6e126734c7ee9283722256c60932c10128af2eaf Mon Sep 17 00:00:00 2001
From: "Marcel W. Wysocki" <maci.stgn@gmail.com>
Date: Wed, 20 May 2026 11:56:12 +0800
Subject: [PATCH 5/5] win32: tests, docs, fixtures, pyproject extras
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Coverage:

- test_win32_pe.py (~5000 LOC, 199 unit tests): every PE parser
  branch (DOS/COFF/optional header, all data directories incl.
  fuzz-input edge cases — runt headers, fuzzed NumberOfRvaAndSizes,
  pathological string-table offsets, virtual-tail RVA rejection),
  label-generation helpers (sanitize_label / compute_iat_labels /
  compute_export_labels), every segtype's behaviour (header summary
  width adjustments for PE32+, exact_encoding inheritance,
  resource-only DLLs, all-forwarder shim DLLs, BSS-only PEs,
  phantom-pointer sections, etc.), CapstoneDisassembler engine
  selection, and Win32SegBin / Win32SegAsm marker semantics.

- test.py: 10 end-to-end test methods covering split + assemble +
  byte-identical round-trip on PE32 and PE32+ synthetic fixtures,
  plus a win32_reassemble byte-identity test for each bitness.

- test/win32_app/ + test/win32_app64/: synthetic PE32 / PE32+
  fixtures with generate.py scripts that emit the binaries on the
  fly (so the suite is hermetic and no binary blobs are committed).

- test-binaries/zoo/README.md: catalogue of 30 freely-
  redistributable PE binaries spanning 1995-2025 + ARM64, organised
  by era band (MSVC 4-6 through MSVC 14.x, MinGW, ScummVM,
  Sysinternals PSTools, PuTTY, etc.) with direct download URLs.
  Binaries themselves stay outside the repo via the .gitignore.

- pyproject.toml: new win32 optional dependency group
  (capstone>=5.0.0). dev pulls it in.

- README.md: surface the win32 platform support and the three
  user-facing scripts (splat split / create_config /
  win32_reassemble).

- CHANGELOG.md: unreleased entry summarising the platform
  addition.
---
 CHANGELOG.md                 |   41 +
 README.md                    |   17 +-
 pyproject.toml               |    4 +
 test-binaries/.gitignore     |    4 +
 test-binaries/zoo/.gitignore |    3 +
 test-binaries/zoo/README.md  |  107 +
 test.py                      |  512 +++
 test/win32_app/.gitignore    |   13 +
 test/win32_app/generate.py   |  230 ++
 test/win32_app/splat.yaml    |   40 +
 test/win32_app/test_win32.py |  125 +
 test/win32_app64/.gitignore  |   12 +
 test/win32_app64/generate.py |  215 ++
 test/win32_app64/splat.yaml  |   45 +
 test_win32_pe.py             | 5768 ++++++++++++++++++++++++++++++++++
 15 files changed, 7135 insertions(+), 1 deletion(-)
 create mode 100644 test-binaries/.gitignore
 create mode 100644 test-binaries/zoo/.gitignore
 create mode 100644 test-binaries/zoo/README.md
 create mode 100644 test/win32_app/.gitignore
 create mode 100644 test/win32_app/generate.py
 create mode 100644 test/win32_app/splat.yaml
 create mode 100644 test/win32_app/test_win32.py
 create mode 100644 test/win32_app64/.gitignore
 create mode 100644 test/win32_app64/generate.py
 create mode 100644 test/win32_app64/splat.yaml
 create mode 100644 test_win32_pe.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5d3b653..db06a292 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,46 @@
 # splat Release Notes
 
+### Unreleased
+
+* Add support for Win32 PE binaries (x86 PE32 and x86_64 PE32+).
+  * New `platform: win32` option backed by a self-contained PE parser
+    (every populated data directory + COFF symtab + x64 SEH unwind
+    info + the .NET CLR header) and a Capstone-based x86 / x86_64
+    disassembler. Optional dependency group `win32` pulls in
+    `capstone>=5.0.0`.
+  * New segtypes under `segtypes/win32/`: `header` (structured PE
+    header byte-by-byte dump + human-readable summary block),
+    `text` / `asm` (Capstone disasm with GAS-compatible operand
+    rewrites), `data` / `rodata` (heuristic string + pointer
+    detection, NUL-run collapse), `bss` (NOLOAD reservation), `bin`
+    (opaque blob for `.reloc` / `.rsrc` / signature / COFF symtab),
+    `pdata` (PE32+ RUNTIME_FUNCTION rows with optional decoded
+    UNWIND_INFO opcode lists).
+  * New compiler tags: `MSVC2..14`, `MINGW`, `CLANG_LLD`. All share
+    the same MASM-style asm conventions; distinct names preserve
+    provenance of generated configs.
+  * `create_config` auto-detects PE files (MZ + PE magic), generates
+    a YAML + symbol_addrs.txt with named symbols for the entrypoint,
+    exports (incl. forwarders as comments), eager + delay imports,
+    TLS callbacks, SafeSEH handlers, /guard:cf targets, /GS security
+    cookie, .NET CLR metadata pointers, and unwind RVAs.
+  * `auto_link_sections` default is `[]` for `platform: win32` (PE
+    sections are independent subsegments — implicit MIPS-style
+    sibling generation produces phantom linker entries otherwise).
+  * New `python -m splat.scripts.win32_reassemble <yaml>` script:
+    runs `as` + `ld` + `objcopy` against the splat-generated
+    layout to reconstruct a PE. With `exact_encoding: true` on
+    text/data/pdata subsegments, the reassembled PE is
+    byte-identical to the original. Verified end-to-end on
+    5 real-world binaries: Sysinternals PsExec (PE32) + PsExec64
+    (PE32+), PuTTY 0.60 (vintage MSVC6), PuTTY 0.70 32-bit (MSVC14
+    with `.00cfg` CFG section), PuTTY 0.83 64-bit (MSVC14 PE32+
+    with 2410 RUNTIME_FUNCTION entries).
+  * Tests: 199 unit tests covering the PE parser, label generation
+    helpers, segtype emission, header rendering, and string
+    detectors; 10 end-to-end tests covering split + reassemble +
+    GAS-clean assembly on both PE32 and PE32+ synthetic fixtures.
+
 ### 0.40.1
 
 * Always write the link dependency file.
diff --git a/README.md b/README.md
index 2ec129f1..d007d97b 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 A binary splitting tool to assist with decompilation and modding projects
 
-Currently, only N64, PSX, PS2 and PSP binaries are supported. More platforms may come in the future.
+Currently N64, PSX, PS2, PSP and Win32 PE (x86 / x86_64) binaries are supported. More platforms may come in the future.
 
 Please check out the [wiki](https://github.com/ethteck/splat/wiki) for more information including [examples](https://github.com/ethteck/splat/wiki/Examples) of projects that use splat.
 
@@ -27,8 +27,23 @@ splat64[mips]>=0.40.1,<1.0.0
 ### Optional dependencies
 
 - `mips`: Required when using the N64, PSX, PS2 or PSP platforms.
+- `win32`: Required when using the Win32 PE platform (pulls in Capstone for x86 / x86_64 disassembly).
 - `dev`: Installs all the available dependencies groups and other packages for development.
 
 ### Gamecube / Wii
 
 For Gamecube / Wii projects, see [decomp-toolkit](https://github.com/encounter/decomp-toolkit)!
+
+### Win32 PE support
+
+The `win32` platform handles PE32 (x86) and PE32+ (x86_64) binaries built by MSVC 4.x-14.x, MinGW (libgcc-linked), and Clang-LLD. Decoded directories include exports, imports, delay imports, bound imports, resources, exception/SEH tables (with unwind-info opcode lists), TLS, /GS + /SAFESEH + /guard:cf load-config, base relocations, debug (CodeView PDB GUID/age extraction), the CLR runtime header (.NET assemblies), and the deprecated COFF symbol table.
+
+Workflow:
+
+```bash
+python -m splat.scripts.create_config my.exe       # auto-generate YAML + symbol_addrs.txt
+python -m splat split my.exe.yaml                  # produce GAS-clean .s + linker script
+python -m splat.scripts.win32_reassemble my.exe.yaml  # link bytes back into a PE
+```
+
+With `exact_encoding: true` on the text/data/pdata subsegments the reassembled PE is byte-identical to the original.
diff --git a/pyproject.toml b/pyproject.toml
index e04cdd70..d2789364 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,8 +26,12 @@ mips = [
     "n64img>=0.3.3",
     "crunch64>=0.5.1,<1.0.0",
 ]
+win32 = [
+    "capstone>=5.0.0",
+]
 dev = [
     "splat64[mips]",
+    "splat64[win32]",
     "ruff",
     "mypy",
     "types-PyYAML",
diff --git a/test-binaries/.gitignore b/test-binaries/.gitignore
new file mode 100644
index 00000000..61c2c982
--- /dev/null
+++ b/test-binaries/.gitignore
@@ -0,0 +1,4 @@
+/split/
+*.exe
+Server/
+!splat.yaml
diff --git a/test-binaries/zoo/.gitignore b/test-binaries/zoo/.gitignore
new file mode 100644
index 00000000..7c9d611b
--- /dev/null
+++ b/test-binaries/zoo/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!README.md
diff --git a/test-binaries/zoo/README.md b/test-binaries/zoo/README.md
new file mode 100644
index 00000000..75c5a9ca
--- /dev/null
+++ b/test-binaries/zoo/README.md
@@ -0,0 +1,107 @@
+# Win32 PE test corpus
+
+Freely-redistributable Windows PE binaries useful for exercising the
+splat win32 platform. None of these are committed to the repo (they're
+re-downloadable and live behind their respective licenses); the
+`.gitignore` here keeps the directory empty in git.
+
+Each entry: project + version + arch / era / license / direct URL.
+
+## 1995-1999 — Win95/98, MSVC 4-6 era (PE32 only)
+
+| Binary | License | URL |
+| --- | --- | --- |
+| Info-ZIP UnZip 6.00 (PE32) | Info-ZIP (BSD-style) | <ftp://ftp.info-zip.org/pub/infozip/win32/unz600xn.exe> |
+| DOSBox 0.74-3 installer (PE32, NSIS wrapper) | GPL-2.0 | <https://sourceforge.net/projects/dosbox/files/dosbox/0.74-3/DOSBox0.74-3-win32-installer.exe/download> |
+| OpenTTD 1.0.0 win9x (PE32) | GPL-2.0 | <https://cdn.openttd.org/openttd-releases/1.0.0/openttd-1.0.0-windows-win9x.zip> |
+
+## 2000-2003 — Win2K/XP, MSVC 6/7
+
+| Binary | License | URL |
+| --- | --- | --- |
+| Python 2.7.18 x86 (PE32) | PSF | <https://www.python.org/ftp/python/2.7.18/python-2.7.18.msi> |
+| Python 2.7.18 amd64 (PE32+) | PSF | <https://www.python.org/ftp/python/2.7.18/python-2.7.18.amd64.msi> |
+| PuTTY 0.60 x86 (PE32) | MIT | <https://the.earth.li/~sgtatham/putty/0.60/x86/putty.exe> |
+
+## 2004-2009 — XP/Vista, MSVC 8/9, early x64
+
+| Binary | License | URL |
+| --- | --- | --- |
+| 7-Zip 9.20 x86 (PE32) | LGPL-2.1 | <https://www.7-zip.org/a/7z920.exe> |
+| Notepad2 4.2.25 x86 (PE32) | freeware (BSD source) | <https://www.flos-freeware.ch/zip/notepad2_4.2.25_x86.zip> |
+| VLC 1.1.11 win32 (PE32, plus DLLs) | GPL-2.0 | <https://download.videolan.org/pub/videolan/vlc/1.1.11/win32/vlc-1.1.11-win32.zip> |
+| PuTTY 0.62 x86 (PE32) | MIT | <https://the.earth.li/~sgtatham/putty/0.62/x86/putty.exe> |
+| ScummVM 1.9.0 win32 (PE32) | GPL-2.0 | <https://downloads.scummvm.org/frs/scummvm/1.9.0/scummvm-1.9.0-win32.zip> |
+| Pidgin 2.10.12 win32-bin (PE32 + DLLs, plain zip) | GPL-2.0 | <https://sourceforge.net/projects/pidgin/files/Pidgin/2.10.12/pidgin-2.10.12-win32-bin.zip/download> |
+
+## 2010-2014 — Win7, MSVC 10/11, PE32+ mainstream
+
+| Binary | License | URL |
+| --- | --- | --- |
+| 7-Zip 16.04 x64 (PE32+) | LGPL-2.1 | <https://www.7-zip.org/a/7z1604-x64.exe> |
+| 7-Zip 16.04 x86 (PE32) | LGPL-2.1 | <https://www.7-zip.org/a/7z1604.exe> |
+| OpenTTD 1.5.3 win64 (PE32+) | GPL-2.0 | <https://cdn.openttd.org/openttd-releases/1.5.3/openttd-1.5.3-windows-win64.zip> |
+| OpenTTD 1.5.3 win32 (PE32) | GPL-2.0 | <https://cdn.openttd.org/openttd-releases/1.5.3/openttd-1.5.3-windows-win32.zip> |
+| VLC 2.0.0 win32 (PE32) | GPL-2.0 | <https://download.videolan.org/pub/videolan/vlc/2.0.0/win32/vlc-2.0.0-win32.zip> |
+| Python 3.4.4 amd64 (PE32+) | PSF | <https://www.python.org/ftp/python/3.4.4/python-3.4.4.amd64.msi> |
+| PuTTY 0.70 x86 (PE32) | MIT | <https://the.earth.li/~sgtatham/putty/0.70/w32/putty.exe> |
+
+## 2015-2025 — Win10/11, MSVC 14.x (CFG, SafeSEH, HighEntropyVA)
+
+| Binary | License | URL |
+| --- | --- | --- |
+| 7-Zip 19.00 x64 (PE32+) | LGPL-2.1 | <https://www.7-zip.org/a/7z1900-x64.exe> |
+| 7-Zip 23.01 x86 (PE32) | LGPL-2.1 | <https://www.7-zip.org/a/7z2301.exe> |
+| Notepad++ 7.9.5 portable x64 (PE32+) | GPL-3.0 | <https://github.com/notepad-plus-plus/notepad-plus-plus/releases/download/v7.9.5/npp.7.9.5.portable.x64.zip> |
+| Notepad++ 8.6.9 portable x86 (PE32) | GPL-3.0 | <https://github.com/notepad-plus-plus/notepad-plus-plus/releases/download/v8.6.9/npp.8.6.9.portable.zip> |
+| VLC 3.0.20 win64 (PE32+) | GPL-2.0 | <https://download.videolan.org/pub/videolan/vlc/3.0.20/win64/vlc-3.0.20-win64.zip> |
+| ScummVM 2.7.0 win32-x86_64 (PE32+) | GPL-2.0 | <https://downloads.scummvm.org/frs/scummvm/2.7.0/scummvm-2.7.0-win32-x86_64.zip> |
+| WinLibs GCC 16.1.0 MinGW-w64 x64 (gcc.exe + bundled tools) | GPL-3.0 + Runtime Lib Exception | <https://github.com/brechtsanders/winlibs_mingw/releases/download/16.1.0posix-14.0.0-msvcrt-r2/winlibs-x86_64-posix-seh-gcc-16.1.0-mingw-w64msvcrt-14.0.0-r2.zip> |
+| WinLibs GCC 16.1.0 MinGW-w64 x86 (PE32) | GPL-3.0 + Runtime Lib Exception | <https://github.com/brechtsanders/winlibs_mingw/releases/download/16.1.0posix-14.0.0-msvcrt-r2/winlibs-i686-posix-dwarf-gcc-16.1.0-mingw-w64msvcrt-14.0.0-r2.zip> |
+
+## ARM64 PE — Win10/11 ARM (MSVC 14.x ARM64 codegen)
+
+| Binary | License | URL |
+| --- | --- | --- |
+| curl-for-win 8.20.0_2 ARM64 | curl license (MIT-like) | <https://curl.se/windows/dl-8.20.0_2/curl-8.20.0_2-win64a-mingw.zip> |
+| Git for Windows 2.54.0 PortableGit ARM64 | GPL-2.0 | <https://github.com/git-for-windows/git/releases/download/v2.54.0.windows.1/PortableGit-2.54.0-arm64.7z.exe> |
+| 7-Zip 26.01 ARM64 | LGPL-2.1 | <https://github.com/ip7z/7zip/releases/download/26.01/7z2601-arm64.exe> |
+| FireDaemon OpenSSL 3.6.2 (x86 + x64 + ARM64 libcrypto/libssl) | Apache-2.0 | <https://download.firedaemon.com/FireDaemon-OpenSSL/openssl-3.6.2.zip> |
+
+## Sysinternals / Microsoft modern (covered in baseline tests)
+
+| Binary | Notes |
+| --- | --- |
+| PuTTY x64 latest | <https://the.earth.li/~sgtatham/putty/latest/w64/putty.exe> — MSVC 14.x, 10 sections, 2410 RUNTIME_FUNCTION entries |
+| Sysinternals PSTools | <https://download.sysinternals.com/files/PSTools.zip> — both PE32 (`PsExec.exe`, SafeSEH) and PE32+ (`PsExec64.exe`, 1049 .pdata) |
+| AccessChk (Sysinternals) | bundled in PSTools.zip |
+| ReactOS prebuilt | <https://reactos.org/getbuilds/> — open-source Windows reimplementation, modern PE features |
+
+## ARM32 PE
+
+Not commonly distributed for non-CE Windows; splat init() rejects
+ARM32 with an arch-specific error. ARM64 is the working ARM tier.
+
+## Verified results
+
+Repo's committed corpus (under `test-binaries/`):
+
+- `test-binaries/Server/server.dll` — MSVC 6.0 PE32 DLL with `.reloc` — 286720/286720 byte-identical round-trip via `exact_encoding`
+- `test-binaries/Europa1400Gold_TL.exe` — MSVC 6.0 PE32 EXE, RELOCS_STRIPPED — `.text` 2490368/2490368 byte-identical via `exact_encoding`
+
+Adding any of the binaries above and running `create_config.py
+<binary>` produces an assembleable splat output.
+
+## Extraction notes
+
+The MSI / NSIS / 7z-SFX wrappers (Python, DOSBox, Pidgin, Git for
+Windows, 7-Zip self-installers) need extraction first:
+
+```
+7z x dosbox-installer.exe          # NSIS, 7z, MSI all supported
+msiextract python-2.7.18.msi       # alternative for MSIs
+```
+
+The plain `.zip` candidates (Notepad++ portable, VLC, OpenTTD, ScummVM,
+Pidgin win32-bin, WinLibs, curl-for-win, FireDaemon) extract straight
+to a tree of PEs — preferred for hermetic test setups.
diff --git a/test.py b/test.py
index 1a3cf77e..6afb8e7c 100755
--- a/test.py
+++ b/test.py
@@ -2,7 +2,9 @@
 
 import difflib
 import filecmp
+import importlib.util
 import io
+import shutil
 from pathlib import Path
 import spimdisasm
 import unittest
@@ -60,6 +62,516 @@ def get_right_only_files(
         for sub_dcmp in dcmp.subdirs.values():
             self.get_right_only_files(sub_dcmp, out)
 
+    def test_win32_create_config_pe32_plus(self):
+        """Run create_win32_config on the synthetic PE32+ fixture and
+        verify it emits an 8-byte ImageBase plus a 64-bit entrypoint VA
+        in symbol_addrs.txt."""
+        from src.splat.scripts.create_config import create_win32_config
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app64")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_gen64_cfg", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        exe_path = fixture_dir / "win32_app64.exe"
+        exe_bytes = exe_path.read_bytes()
+
+        tmp_dir = fixture_dir / "_autogen64"
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir)
+        tmp_dir.mkdir()
+
+        cwd = Path.cwd()
+        try:
+            import os
+
+            os.chdir(tmp_dir)
+            create_win32_config(Path("..") / exe_path.name, exe_bytes)
+        finally:
+            os.chdir(cwd)
+
+        yaml_files = list(tmp_dir.glob("*.yaml"))
+        self.assertTrue(yaml_files, "PE32+ autogen produced no YAML")
+        yaml_text = yaml_files[0].read_text(encoding="utf-8")
+        # 64-bit ImageBase / VA must surface as a 9-hex address.
+        self.assertIn("0x140001000", yaml_text)
+
+        sym_addrs = (tmp_dir / "symbol_addrs.txt").read_text(encoding="utf-8")
+        self.assertIn("entrypoint = 0x140001000", sym_addrs)
+
+        shutil.rmtree(tmp_dir)
+
+    def test_win32_create_config(self):
+        """Smoke-test the auto-generated win32 splat config."""
+        from src.splat.scripts.create_config import create_win32_config
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_generate2", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        exe_path = fixture_dir / "win32_app.exe"
+        exe_bytes = exe_path.read_bytes()
+
+        tmp_dir = fixture_dir / "_autogen"
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir)
+        tmp_dir.mkdir()
+
+        cwd = Path.cwd()
+        try:
+            import os
+
+            # create_win32_config writes outputs relative to CWD.
+            os.chdir(tmp_dir)
+            create_win32_config(Path("..") / exe_path.name, exe_bytes)
+        finally:
+            os.chdir(cwd)
+
+        yaml_files = list(tmp_dir.glob("*.yaml"))
+        self.assertTrue(yaml_files, "autogen produced no YAML")
+        sym_addrs = tmp_dir / "symbol_addrs.txt"
+        self.assertTrue(sym_addrs.exists(), "autogen produced no symbol_addrs.txt")
+        sym_text = sym_addrs.read_text(encoding="utf-8")
+        self.assertIn("entrypoint", sym_text)
+        # Provenance preamble: source filename + truncated sha1.
+        import hashlib
+
+        expected_sha1 = hashlib.sha1(exe_bytes).hexdigest()[:12]
+        self.assertIn(exe_path.name, sym_text)
+        self.assertIn(expected_sha1, sym_text)
+        # YAML sha1 should match the binary's full sha1.
+        yaml_text = yaml_files[0].read_text(encoding="utf-8")
+        self.assertIn(hashlib.sha1(exe_bytes).hexdigest(), yaml_text)
+
+        shutil.rmtree(tmp_dir)
+
+    def test_win32_app64(self):
+        """Smoke test the PE32+ (x86_64) code path: generate the binary,
+        split it, assert the entry disasm and the PE bitness."""
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app64")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_generate64", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        split_dir = fixture_dir / "split"
+        if split_dir.exists():
+            shutil.rmtree(split_dir)
+
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([fixture_dir / "splat.yaml"], None, False)
+
+        text = (split_dir / "asm/main_text.s").read_text(encoding="utf-8")
+        # x86_64 mode: the entry should decode to `mov eax, 0x2a; ret`.
+        self.assertIn("mov eax, 0x2a", text)
+        self.assertIn("ret", text)
+        # Entrypoint VA encodes the 64-bit ImageBase.
+        self.assertIn("0x140001000", text)
+        # RIP-relative load should be substituted with the .rdata label.
+        self.assertIn("[D_140002000]", text)
+
+        # Round-trip: every generated .s file must assemble cleanly with GAS.
+        for asm in sorted(split_dir.rglob("*.s")):
+            self._assert_assembles(asm, "--64")
+
+        # Win32SegPdata renders the RUNTIME_FUNCTION record as a
+        # structured `.long … RUNTIME_FUNCTION` row (vs an opaque byte
+        # blob). The single record in the fixture covers our text body.
+        pdata_path = split_dir / "asm/data/main_pdata.s"
+        self.assertTrue(pdata_path.exists(), "pdata file missing")
+        pdata = pdata_path.read_text(encoding="utf-8")
+        self.assertIn("RUNTIME_FUNCTION", pdata)
+        self.assertIn(".pdata", pdata)
+        # Single record + null terminator + trailing zero .space.
+        self.assertIn(".space", pdata)
+
+    def _assert_assembles(self, asm_path: Path, mode_flag: str) -> None:
+        """Invoke `as` to assemble the given file. Skip silently if `as`
+        isn't available; fail loudly if it is and rejects the file."""
+        import shutil as _shutil
+        import subprocess as _sub
+
+        if _shutil.which("as") is None:
+            return
+        out = _sub.run(
+            ["as", mode_flag, str(asm_path), "-o", "/dev/null"],
+            capture_output=True,
+        )
+        self.assertEqual(
+            out.returncode,
+            0,
+            f"`as` rejected {asm_path}:\n{out.stderr.decode(errors='replace')}",
+        )
+
+    def test_win32_app(self):
+        """Run splat against the synthetic PE32 fixture and check the
+        output. Generates the binary on the fly so the test is hermetic
+        and doesn't require committing a binary executable."""
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_generate", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        split_dir = fixture_dir / "split"
+        if split_dir.exists():
+            shutil.rmtree(split_dir)
+
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([fixture_dir / "splat.yaml"], None, False)
+
+        for name in ("asm/header.s", "asm/main_text.s", "win32_app.ld"):
+            self.assertTrue((split_dir / name).exists(), f"win32 split missing {name}")
+
+        text = (split_dir / "asm/main_text.s").read_text(encoding="utf-8")
+        for needle in ("push ebp", "mov ebp, esp", "mov eax, 0x2a", "ret"):
+            self.assertIn(needle, text)
+
+        # Round-trip: every generated .s file must assemble cleanly with
+        # GAS. Covers header, text, data, bss simultaneously.
+        for asm in sorted(split_dir.rglob("*.s")):
+            self._assert_assembles(asm, "--32")
+
+    def test_win32_exact_encoding(self):
+        """`exact_encoding: true` must produce a byte-identical .text
+        after GAS assembly + objcopy."""
+        import shutil as _shutil
+        import subprocess as _sub
+
+        if _shutil.which("as") is None or _shutil.which("objcopy") is None:
+            self.skipTest("`as` / `objcopy` not installed")
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_exact_gen", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        out_root = fixture_dir / "split-exact"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+        exact_yaml = fixture_dir / "splat-exact.yaml"
+        exact_yaml.write_text(
+            (fixture_dir / "splat.yaml")
+            .read_text(encoding="utf-8")
+            .replace("base_path: split", "base_path: split-exact")
+            .replace(
+                "[0x200, text, main_text]",
+                "{ start: 0x200, type: text, name: main_text, exact_encoding: true }",
+            )
+        )
+
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([exact_yaml], None, False)
+
+        asm = out_root / "asm/main_text.s"
+        obj = fixture_dir / "main_text.o"
+        binf = fixture_dir / "main_text.bin"
+        try:
+            r = _sub.run(["as", "--32", str(asm), "-o", str(obj)], capture_output=True)
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            r = _sub.run(
+                ["objcopy", "-O", "binary", "-j", ".text", str(obj), str(binf)],
+                capture_output=True,
+            )
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            orig = (fixture_dir / "win32_app.exe").read_bytes()[0x200 : 0x200 + 0x11]
+            reasm = binf.read_bytes()[: len(orig)]
+            self.assertEqual(
+                orig,
+                reasm,
+                f"exact_encoding text bytes diverge from original "
+                f"({sum(1 for a, b in zip(orig, reasm) if a != b)} mismatches)",
+            )
+        finally:
+            for p in (obj, binf, exact_yaml):
+                if p.exists():
+                    p.unlink()
+            if out_root.exists():
+                _shutil.rmtree(out_root)
+
+    def test_win32_reassemble_pe32_roundtrip(self):
+        """Drive splat split + win32_reassemble end-to-end on the PE32
+        fixture: the produced PE must have at least a valid PE header
+        with a matching ImageBase / Machine. (Full byte-identity is
+        delivered by `exact_encoding`; this round-trip catches bin/asm
+        wiring breakage that the section-level tests don't see.)"""
+        import shutil as _shutil
+
+        if any(_shutil.which(t) is None for t in ("as", "ld", "objcopy")):
+            self.skipTest("`as` / `ld` / `objcopy` not installed")
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+        fixture_dir = Path("test/win32_app")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_reasm_gen", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        split_dir = fixture_dir / "split"
+        if split_dir.exists():
+            shutil.rmtree(split_dir)
+
+        from src.splat.scripts.split import main as splat_main
+        from src.splat.scripts.win32_reassemble import reassemble
+
+        splat_main([fixture_dir / "splat.yaml"], None, False)
+
+        out_path = fixture_dir / "win32_app.reasm.exe"
+        if out_path.exists():
+            out_path.unlink()
+        try:
+            reassemble(fixture_dir / "splat.yaml", out_path, verbose=False)
+            self.assertTrue(out_path.exists(), "reassembly produced no file")
+            data = out_path.read_bytes()
+            self.assertEqual(data[:2], b"MZ", "reassembly is not an MZ file")
+            pe_off = int.from_bytes(data[0x3C:0x40], "little")
+            self.assertEqual(
+                data[pe_off : pe_off + 4], b"PE\x00\x00", "no PE signature at e_lfanew"
+            )
+        finally:
+            if out_path.exists():
+                out_path.unlink()
+            elf = out_path.with_suffix(".reasm.elf")
+            if elf.exists():
+                elf.unlink()
+
+    def test_win32_reassemble_byte_identical_with_exact_encoding(self):
+        """When the YAML enables exact_encoding on text + data segments,
+        win32_reassemble should produce a byte-identical PE — closing the
+        full split + reassemble round-trip."""
+        import shutil as _shutil
+
+        if any(_shutil.which(t) is None for t in ("as", "objcopy")):
+            self.skipTest("`as` / `objcopy` not installed")
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+        fixture_dir = Path("test/win32_app")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_reasm_exact_gen", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        out_root = fixture_dir / "split-reasm-exact"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+        exact_yaml = fixture_dir / "splat-reasm-exact.yaml"
+        exact_yaml.write_text(
+            (fixture_dir / "splat.yaml")
+            .read_text(encoding="utf-8")
+            .replace("base_path: split", "base_path: split-reasm-exact")
+            .replace(
+                "[0x200, text, main_text]",
+                "{ start: 0x200, type: text, name: main_text, exact_encoding: true }",
+            )
+            .replace(
+                "[0x400, data, main_data]",
+                "{ start: 0x400, type: data, name: main_data, exact_encoding: true }",
+            )
+        )
+
+        from src.splat.scripts.split import main as splat_main
+        from src.splat.scripts.win32_reassemble import reassemble
+
+        splat_main([exact_yaml], None, False)
+
+        out_path = fixture_dir / "win32_app.reasm-exact.exe"
+        if out_path.exists():
+            out_path.unlink()
+        try:
+            reassemble(exact_yaml, out_path, verbose=False)
+            orig = (fixture_dir / "win32_app.exe").read_bytes()
+            reasm = out_path.read_bytes()
+            self.assertEqual(
+                orig,
+                reasm,
+                f"reassembled PE diverges from original "
+                f"({sum(1 for a, b in zip(orig, reasm) if a != b)} mismatches)",
+            )
+        finally:
+            for p in (out_path, exact_yaml):
+                if p.exists():
+                    p.unlink()
+            if out_root.exists():
+                _shutil.rmtree(out_root)
+
+    def test_win32_reassemble_byte_identical_pe32_plus(self):
+        """PE32+ equivalent of the byte-identical reassembly test.
+        Generate the x86_64 fixture, split with exact_encoding on text
+        + rdata, run win32_reassemble, then byte-compare against the
+        original PE."""
+        import shutil as _shutil
+
+        if any(_shutil.which(t) is None for t in ("as", "objcopy")):
+            self.skipTest("`as` / `objcopy` not installed")
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+        fixture_dir = Path("test/win32_app64")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_reasm_exact_gen64", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        out_root = fixture_dir / "split-reasm-exact"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+        exact_yaml = fixture_dir / "splat-reasm-exact.yaml"
+        exact_yaml.write_text(
+            (fixture_dir / "splat.yaml")
+            .read_text(encoding="utf-8")
+            .replace("base_path: split", "base_path: split-reasm-exact")
+            .replace(
+                "[0x200, text, main_text]",
+                "{ start: 0x200, type: text, name: main_text, exact_encoding: true }",
+            )
+            .replace(
+                "[0x400, rodata, main_rdata]",
+                "{ start: 0x400, type: rodata, name: main_rdata, exact_encoding: true }",
+            )
+            .replace(
+                "[0x600, pdata, main_pdata]",
+                "{ start: 0x600, type: pdata, name: main_pdata, exact_encoding: true }",
+            )
+        )
+
+        from src.splat.scripts.split import main as splat_main
+        from src.splat.scripts.win32_reassemble import reassemble
+
+        splat_main([exact_yaml], None, False)
+
+        out_path = fixture_dir / "win32_app64.reasm-exact.exe"
+        if out_path.exists():
+            out_path.unlink()
+        try:
+            reassemble(exact_yaml, out_path, verbose=False)
+            orig = (fixture_dir / "win32_app64.exe").read_bytes()
+            reasm = out_path.read_bytes()
+            self.assertEqual(
+                orig,
+                reasm,
+                f"reassembled PE32+ diverges from original "
+                f"({sum(1 for a, b in zip(orig, reasm) if a != b)} mismatches)",
+            )
+        finally:
+            for p in (out_path, exact_yaml):
+                if p.exists():
+                    p.unlink()
+            if out_root.exists():
+                _shutil.rmtree(out_root)
+
+    def test_win32_exact_encoding_pe32_plus(self):
+        """Mirror of `test_win32_exact_encoding` for the PE32+ fixture.
+        Locks down that x86_64 emit_disasm + exact_encoding round-trips
+        the .text byte-for-byte through `as --64` + objcopy."""
+        import shutil as _shutil
+        import subprocess as _sub
+
+        if _shutil.which("as") is None or _shutil.which("objcopy") is None:
+            self.skipTest("`as` / `objcopy` not installed")
+
+        spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
+
+        fixture_dir = Path("test/win32_app64")
+        gen_spec = importlib.util.spec_from_file_location(
+            "_win32_exact_gen64", fixture_dir / "generate.py"
+        )
+        assert gen_spec is not None and gen_spec.loader is not None
+        gen = importlib.util.module_from_spec(gen_spec)
+        gen_spec.loader.exec_module(gen)
+        gen.main()
+
+        out_root = fixture_dir / "split-exact"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+        exact_yaml = fixture_dir / "splat-exact.yaml"
+        exact_yaml.write_text(
+            (fixture_dir / "splat.yaml")
+            .read_text(encoding="utf-8")
+            .replace("base_path: split", "base_path: split-exact")
+            .replace(
+                "[0x200, text, main_text]",
+                "{ start: 0x200, type: text, name: main_text, exact_encoding: true }",
+            )
+        )
+
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([exact_yaml], None, False)
+
+        asm = out_root / "asm/main_text.s"
+        obj = fixture_dir / "main_text.o"
+        binf = fixture_dir / "main_text.bin"
+        try:
+            r = _sub.run(["as", "--64", str(asm), "-o", str(obj)], capture_output=True)
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            r = _sub.run(
+                ["objcopy", "-O", "binary", "-j", ".text", str(obj), str(binf)],
+                capture_output=True,
+            )
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            # Fixture text section starts at file 0x200; check at least the
+            # first few instructions: `mov eax, 0x2a; ret` + RIP-relative
+            # data load (whatever generate.py emits).
+            orig_full = (fixture_dir / "win32_app64.exe").read_bytes()
+            orig = orig_full[0x200 : 0x200 + 0x20]
+            reasm = binf.read_bytes()[: len(orig)]
+            self.assertEqual(
+                orig,
+                reasm,
+                f"exact_encoding PE32+ text bytes diverge "
+                f"({sum(1 for a, b in zip(orig, reasm) if a != b)} mismatches)",
+            )
+        finally:
+            for p in (obj, binf, exact_yaml):
+                if p.exists():
+                    p.unlink()
+            if out_root.exists():
+                _shutil.rmtree(out_root)
+
     def test_basic_app(self):
         spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False
         main([Path("test/basic_app/splat.yaml")], None, False)
diff --git a/test/win32_app/.gitignore b/test/win32_app/.gitignore
new file mode 100644
index 00000000..188a3d67
--- /dev/null
+++ b/test/win32_app/.gitignore
@@ -0,0 +1,13 @@
+/split/
+/split-exact/
+/split-reasm-exact/
+/_autogen/
+/win32_app.exe
+/win32_app.reasm.exe
+/win32_app.reasm-exact.exe
+/main_text.o
+/main_text.bin
+/splat-exact.yaml
+/splat-reasm-exact.yaml
+__pycache__/
+!splat.yaml
diff --git a/test/win32_app/generate.py b/test/win32_app/generate.py
new file mode 100644
index 00000000..7f4d0b20
--- /dev/null
+++ b/test/win32_app/generate.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""Generate a minimal MSVC6-style PE32 i386 executable for the win32 splat test.
+
+The output is byte-stable so the test can pin expectations. The binary has
+three sections — .text, .data, .bss — and a tiny DOS stub. The .text payload
+is a hand-rolled x86 program that returns 0x2A from `main`.
+
+Run from the repo root:
+    python3 test/win32_app/generate.py
+"""
+
+from pathlib import Path
+import struct
+
+
+HERE = Path(__file__).parent
+OUT = HERE / "win32_app.exe"
+
+IMAGE_BASE = 0x00400000
+SECTION_ALIGN = 0x1000
+FILE_ALIGN = 0x200
+HEADER_SIZE = FILE_ALIGN  # 0x200
+
+# Section layout
+TEXT_RVA = 0x1000
+DATA_RVA = 0x2000
+BSS_RVA = 0x3000
+BSS_VIRT_SIZE = 0x100
+
+TEXT_BODY = bytes(
+    [
+        # main:
+        0x55,  # push ebp
+        0x89,
+        0xE5,  # mov ebp, esp
+        0xB8,
+        0x2A,
+        0x00,
+        0x00,
+        0x00,  # mov eax, 0x2A
+        0x5D,  # pop ebp
+        0xC3,  # ret
+        # _start: calls main, then halts in a tiny loop.
+        0xE8,
+        0xF1,
+        0xFF,
+        0xFF,
+        0xFF,  # call main (rel32 = -15)
+        0xEB,
+        0xFE,  # jmp $ (spin)
+    ]
+)
+
+DATA_BODY = b"Hello, splat win32!\x00"
+
+
+def make_section_header(
+    name: bytes,
+    virt_size: int,
+    virt_addr: int,
+    raw_size: int,
+    raw_ptr: int,
+    chars: int,
+) -> bytes:
+    return struct.pack(
+        "<8sIIIIIIHHI",
+        name.ljust(8, b"\x00")[:8],
+        virt_size,
+        virt_addr,
+        raw_size,
+        raw_ptr,
+        0,
+        0,
+        0,
+        0,
+        chars,
+    )
+
+
+def pad_to(buf: bytearray, target: int, fill: int = 0x00) -> None:
+    if len(buf) > target:
+        raise ValueError(f"buffer overrun: {len(buf)} > {target}")
+    buf.extend(bytes([fill]) * (target - len(buf)))
+
+
+def build_pe() -> bytes:
+    # --- DOS header + stub ---
+    dos = bytearray(64)
+    dos[0:2] = b"MZ"
+    dos[0x3C:0x40] = struct.pack("<I", 0x80)  # e_lfanew → 0x80
+    dos_stub = (
+        bytes(
+            [
+                0x0E,
+                0x1F,
+                0xBA,
+                0x0E,
+                0x00,
+                0xB4,
+                0x09,
+                0xCD,
+                0x21,
+                0xB8,
+                0x01,
+                0x4C,
+                0xCD,
+                0x21,
+            ]
+        )
+        + b"This program cannot be run in DOS mode.\r\r\n$\x00"
+    )
+    full_dos = bytes(dos) + dos_stub
+    # Pad to 0x80
+    full_dos = full_dos.ljust(0x80, b"\x00")
+
+    # --- PE signature + COFF header ---
+    pe_sig = b"PE\x00\x00"
+    num_sections = 3
+    size_of_optional_header = 0xE0  # standard PE32
+    characteristics = 0x010F  # EXECUTABLE_IMAGE | RELOCS_STRIPPED | LINE_NUMS_STRIPPED | LOCAL_SYMS_STRIPPED | 32BIT_MACHINE
+    coff = struct.pack(
+        "<HHIIIHH",
+        0x014C,  # Machine = i386
+        num_sections,
+        0x12345678,  # TimeDateStamp
+        0,  # PointerToSymbolTable
+        0,  # NumberOfSymbols
+        size_of_optional_header,
+        characteristics,
+    )
+
+    # --- Optional header (PE32) ---
+    text_raw_size = (len(TEXT_BODY) + FILE_ALIGN - 1) & ~(FILE_ALIGN - 1)
+    data_raw_size = (len(DATA_BODY) + FILE_ALIGN - 1) & ~(FILE_ALIGN - 1)
+
+    text_raw_ptr = HEADER_SIZE
+    data_raw_ptr = text_raw_ptr + text_raw_size
+    # .bss is NOLOAD — raw_ptr = 0, raw_size = 0
+    size_of_image = (BSS_RVA + BSS_VIRT_SIZE + SECTION_ALIGN - 1) & ~(SECTION_ALIGN - 1)
+
+    opt = struct.pack(
+        "<HBBIIIIIIIIIHHHHHHIIIIHHIIIIII",
+        0x010B,  # Magic = PE32
+        6,  # MajorLinkerVersion (MSVC 6)
+        0,  # MinorLinkerVersion
+        text_raw_size,  # SizeOfCode
+        data_raw_size,  # SizeOfInitializedData
+        BSS_VIRT_SIZE,  # SizeOfUninitializedData
+        TEXT_RVA + len(TEXT_BODY) - 7,  # AddressOfEntryPoint → _start
+        TEXT_RVA,  # BaseOfCode
+        DATA_RVA,  # BaseOfData
+        IMAGE_BASE,  # ImageBase
+        SECTION_ALIGN,  # SectionAlignment
+        FILE_ALIGN,  # FileAlignment
+        4,  # MajorOSVersion
+        0,  # MinorOSVersion
+        0,  # MajorImageVersion
+        0,  # MinorImageVersion
+        4,  # MajorSubsystemVersion
+        0,  # MinorSubsystemVersion
+        0,  # Win32VersionValue
+        size_of_image,
+        HEADER_SIZE,  # SizeOfHeaders
+        0,  # CheckSum
+        3,  # Subsystem (Windows CUI)
+        0,  # DllCharacteristics
+        0x100000,  # SizeOfStackReserve
+        0x1000,  # SizeOfStackCommit
+        0x100000,  # SizeOfHeapReserve
+        0x1000,  # SizeOfHeapCommit
+        0,  # LoaderFlags
+        16,  # NumberOfRvaAndSizes
+    )
+    # 16 data directories, each {VirtualAddress, Size} = 8 bytes
+    opt += b"\x00" * (16 * 8)
+    assert len(opt) == size_of_optional_header, (len(opt), size_of_optional_header)
+
+    # --- Section headers ---
+    sections = b""
+    sections += make_section_header(
+        b".text",
+        len(TEXT_BODY),
+        TEXT_RVA,
+        text_raw_size,
+        text_raw_ptr,
+        0x60000020,  # CODE | EXEC | READ
+    )
+    sections += make_section_header(
+        b".data",
+        len(DATA_BODY),
+        DATA_RVA,
+        data_raw_size,
+        data_raw_ptr,
+        0xC0000040,  # INITIALIZED_DATA | READ | WRITE
+    )
+    sections += make_section_header(
+        b".bss",
+        BSS_VIRT_SIZE,
+        BSS_RVA,
+        0,
+        0,
+        0xC0000080,  # UNINITIALIZED_DATA | READ | WRITE
+    )
+
+    # --- Assemble ---
+    buf = bytearray()
+    buf += full_dos
+    assert len(buf) == 0x80
+    buf += pe_sig + coff + opt + sections
+    pad_to(buf, HEADER_SIZE)
+
+    # .text raw
+    buf += TEXT_BODY
+    pad_to(buf, text_raw_ptr + text_raw_size)
+
+    # .data raw
+    buf += DATA_BODY
+    pad_to(buf, data_raw_ptr + data_raw_size)
+
+    return bytes(buf)
+
+
+def main() -> None:
+    pe = build_pe()
+    OUT.write_bytes(pe)
+    print(f"wrote {OUT} ({len(pe)} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/win32_app/splat.yaml b/test/win32_app/splat.yaml
new file mode 100644
index 00000000..90163df4
--- /dev/null
+++ b/test/win32_app/splat.yaml
@@ -0,0 +1,40 @@
+options:
+  base_path: split
+  platform: win32
+  compiler: MSVC6
+  basename: win32_app
+  build_path: build
+  target_path: ../win32_app.exe
+  asm_path: asm
+  src_path: src
+  data_path: data
+  ld_script_path: win32_app.ld
+  cache_path: .splache
+  symbol_addrs_path: generated.symbols.txt
+  undefined_funcs_auto_path: undefined_funcs_auto.txt
+  undefined_syms_auto_path: undefined_syms_auto.txt
+  asset_path: assets
+  section_order: [".header", ".text", ".data", ".rodata", ".bss"]
+
+segments:
+  - name: header
+    type: header
+    start: 0x0
+
+  - name: text
+    type: code
+    start: 0x200
+    vram: 0x00401000
+    subsegments:
+      - [0x200, text, main_text]
+
+  - name: data
+    type: code
+    start: 0x400
+    vram: 0x00402000
+    subsegments:
+      - [0x400, data, main_data]
+
+  - { name: bss, type: bss, vram: 0x00403000, bss_size: 0x100 }
+
+  - [0x600]
diff --git a/test/win32_app/test_win32.py b/test/win32_app/test_win32.py
new file mode 100644
index 00000000..82924e42
--- /dev/null
+++ b/test/win32_app/test_win32.py
@@ -0,0 +1,125 @@
+"""Stand-alone smoke test for the win32 platform.
+
+Regenerates the synthetic PE32 fixture, runs splat against it, and checks
+that the expected output files were produced. Designed to be invoked
+either directly (``python test/win32_app/test_win32.py``) or via the
+top-level ``test.py``.
+"""
+
+from pathlib import Path
+import shutil
+import sys
+import unittest
+
+THIS_DIR = Path(__file__).parent
+REPO_ROOT = THIS_DIR.parent.parent
+sys.path.insert(0, str(REPO_ROOT))
+
+
+class Win32App(unittest.TestCase):
+    def setUp(self):
+        # Regenerate the binary so the test is hermetic.
+        import importlib.util
+
+        spec = importlib.util.spec_from_file_location(
+            "_win32_generate", THIS_DIR / "generate.py"
+        )
+        generate = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(generate)
+        generate.main()
+
+        out_root = THIS_DIR / "split"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+    def test_split_runs_and_emits_expected_files(self):
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([THIS_DIR / "splat.yaml"], None, False)
+
+        out_root = THIS_DIR / "split"
+        expected_paths = [
+            out_root / "asm" / "header.s",
+            out_root / "asm" / "main_text.s",
+            out_root / "win32_app.ld",
+        ]
+        for path in expected_paths:
+            self.assertTrue(path.exists(), f"missing expected output: {path}")
+
+        # Sanity-check the disassembly: the entry instructions for `main`
+        # should always decode to the same sequence.
+        text = (out_root / "asm" / "main_text.s").read_text(encoding="utf-8")
+        for needle in ("push ebp", "mov ebp, esp", "mov eax, 0x2a", "ret"):
+            self.assertIn(needle, text, f"win32 disasm missing '{needle}'")
+
+        # Header must round-trip the PE signature and section names.
+        header = (out_root / "asm" / "header.s").read_text(encoding="utf-8")
+        for needle in (
+            '.ascii "MZ"',
+            '.ascii "PE\\0\\0"',
+            '.ascii ".text',
+            '.ascii ".data',
+            '.ascii ".bss',
+        ):
+            self.assertIn(needle, header, f"win32 header missing {needle!r}")
+
+        # Linker script must mention the segments we declared.
+        ld = (out_root / "win32_app.ld").read_text(encoding="utf-8")
+        for needle in ("header", "main_text", "main_data", "bss"):
+            self.assertIn(needle, ld, f"linker script missing {needle!r}")
+
+    def test_exact_encoding_byte_identical(self):
+        """`exact_encoding: true` on a text subsegment must produce a
+        byte-identical .text after assembly."""
+        import shutil as _shutil
+        import subprocess as _sub
+
+        if _shutil.which("as") is None or _shutil.which("objcopy") is None:
+            self.skipTest("`as`/`objcopy` not installed")
+
+        out_root = THIS_DIR / "split-exact"
+        if out_root.exists():
+            shutil.rmtree(out_root)
+
+        # Write an alternate yaml with exact_encoding enabled.
+        exact_yaml = THIS_DIR / "splat-exact.yaml"
+        exact_yaml.write_text(
+            (THIS_DIR / "splat.yaml")
+            .read_text(encoding="utf-8")
+            .replace("base_path: split", "base_path: split-exact")
+            .replace(
+                "[0x200, text, main_text]",
+                "{ start: 0x200, type: text, name: main_text, exact_encoding: true }",
+            )
+        )
+
+        from src.splat.scripts.split import main as splat_main
+
+        splat_main([exact_yaml], None, False)
+
+        asm = out_root / "asm/main_text.s"
+        obj = THIS_DIR / "main_text.o"
+        binf = THIS_DIR / "main_text.bin"
+        try:
+            r = _sub.run(["as", "--32", str(asm), "-o", str(obj)], capture_output=True)
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            r = _sub.run(
+                ["objcopy", "-O", "binary", "-j", ".text", str(obj), str(binf)],
+                capture_output=True,
+            )
+            self.assertEqual(r.returncode, 0, r.stderr.decode())
+            orig = (THIS_DIR / "win32_app.exe").read_bytes()[0x200 : 0x200 + 0x11]
+            reasm = binf.read_bytes()[: len(orig)]
+            self.assertEqual(
+                orig, reasm, "exact_encoding text bytes diverge from original"
+            )
+        finally:
+            for p in (obj, binf, exact_yaml):
+                if p.exists():
+                    p.unlink()
+            if out_root.exists():
+                _shutil.rmtree(out_root)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/win32_app64/.gitignore b/test/win32_app64/.gitignore
new file mode 100644
index 00000000..e0220977
--- /dev/null
+++ b/test/win32_app64/.gitignore
@@ -0,0 +1,12 @@
+/split/
+/split-exact/
+/split-reasm-exact/
+/win32_app64.exe
+/win32_app64.reasm.exe
+/win32_app64.reasm-exact.exe
+/main_text.o
+/main_text.bin
+/splat-exact.yaml
+/splat-reasm-exact.yaml
+__pycache__/
+!splat.yaml
diff --git a/test/win32_app64/generate.py b/test/win32_app64/generate.py
new file mode 100644
index 00000000..76749ab9
--- /dev/null
+++ b/test/win32_app64/generate.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""Generate a minimal PE32+ (x86_64) Windows executable.
+
+Mirrors `test/win32_app/generate.py` but emits the 64-bit optional header
+and uses an x86_64 instruction stream. Used by the win32 test suite to
+exercise the PE32+ code path through `parse_pe`, `CapstoneDisassembler`,
+and the win32 segtypes.
+"""
+
+from pathlib import Path
+import struct
+
+
+HERE = Path(__file__).parent
+OUT = HERE / "win32_app64.exe"
+
+IMAGE_BASE = 0x140000000  # standard PE32+ image base
+SECTION_ALIGN = 0x1000
+FILE_ALIGN = 0x200
+HEADER_SIZE = FILE_ALIGN
+
+TEXT_RVA = 0x1000
+RDATA_RVA = 0x2000
+PDATA_RVA = 0x3000
+
+# A tiny x86_64 program:
+#   mov rax, [rip + 0xFF6]   ; load 64-bit value from .rdata (insn at 0x140001000,
+#                              next IP 0x140001007, +0xFF6 = 0x140001FFD → not aligned;
+#                              we'll target 0x140002000 i.e. .rdata start: disp 0xFF9).
+#   mov eax, 0x2a            ; return 42
+#   ret
+TEXT_BODY = bytes(
+    [
+        0x48,
+        0x8B,
+        0x05,
+        0xF9,
+        0x0F,
+        0x00,
+        0x00,  # mov rax, [rip + 0xFF9]
+        0xB8,
+        0x2A,
+        0x00,
+        0x00,
+        0x00,  # mov eax, 0x2A
+        0xC3,  # ret
+    ]
+)
+
+# 8 bytes of constant data the load above targets.
+RDATA_BODY = bytes([0xEF, 0xBE, 0xAD, 0xDE, 0x00, 0x00, 0x00, 0x00])
+
+# A single RUNTIME_FUNCTION record covering the body of our tiny text
+# routine — (BeginRVA=0x1000, EndRVA=0x100D, UnwindInfoRVA=0x4000).
+PDATA_BODY = struct.pack("<III", 0x1000, 0x100D, 0x4000)
+
+
+def make_section_header(name, virt_size, virt_addr, raw_size, raw_ptr, chars):
+    return struct.pack(
+        "<8sIIIIIIHHI",
+        name.ljust(8, b"\x00")[:8],
+        virt_size,
+        virt_addr,
+        raw_size,
+        raw_ptr,
+        0,
+        0,
+        0,
+        0,
+        chars,
+    )
+
+
+def build_pe() -> bytes:
+    # DOS header + stub
+    dos = bytearray(64)
+    dos[0:2] = b"MZ"
+    dos[0x3C:0x40] = struct.pack("<I", 0x80)
+    dos_stub = (
+        bytes(
+            [
+                0x0E,
+                0x1F,
+                0xBA,
+                0x0E,
+                0x00,
+                0xB4,
+                0x09,
+                0xCD,
+                0x21,
+                0xB8,
+                0x01,
+                0x4C,
+                0xCD,
+                0x21,
+            ]
+        )
+        + b"This program cannot be run in DOS mode.\r\r\n$\x00"
+    )
+    full_dos = (bytes(dos) + dos_stub).ljust(0x80, b"\x00")
+
+    # COFF header
+    pe_sig = b"PE\x00\x00"
+    # PE32+ optional header is 0xF0 bytes (28 standard + 88 windows + 128 directories
+    # = 28 + 88 + 128 = 244 = 0xF4 nominal, but standard size is 0xF0).
+    size_of_optional_header = 0xF0
+    characteristics = 0x0022  # EXECUTABLE_IMAGE | LARGE_ADDRESS_AWARE
+    num_sections = 3
+    coff = struct.pack(
+        "<HHIIIHH",
+        0x8664,  # Machine = x86_64
+        num_sections,
+        0x12345678,  # TimeDateStamp
+        0,
+        0,
+        size_of_optional_header,
+        characteristics,
+    )
+
+    text_raw_size = (len(TEXT_BODY) + FILE_ALIGN - 1) & ~(FILE_ALIGN - 1)
+    text_raw_ptr = HEADER_SIZE
+    rdata_raw_size = (len(RDATA_BODY) + FILE_ALIGN - 1) & ~(FILE_ALIGN - 1)
+    rdata_raw_ptr = text_raw_ptr + text_raw_size
+    pdata_raw_size = (len(PDATA_BODY) + FILE_ALIGN - 1) & ~(FILE_ALIGN - 1)
+    pdata_raw_ptr = rdata_raw_ptr + rdata_raw_size
+    size_of_image = ((PDATA_RVA + len(PDATA_BODY)) + SECTION_ALIGN - 1) & ~(
+        SECTION_ALIGN - 1
+    )
+
+    # PE32+ optional header (note: BaseOfData is omitted, ImageBase is QWORD,
+    # SizeOfStack/Heap fields are QWORDs).
+    opt = struct.pack(
+        "<HBBIIIII"  # Magic..BaseOfCode
+        "Q"  # ImageBase
+        "II"  # SectionAlignment, FileAlignment
+        "HHHHHHI"  # versions + Win32VersionValue
+        "IIII"  # SizeOfImage, Headers, CheckSum, Subsystem/DllChars combined
+        "QQQQ"  # Stack/Heap reserve/commit
+        "II",  # LoaderFlags + NumberOfRvaAndSizes
+        0x020B,  # Magic = PE32+
+        14,
+        0,  # MajorLinkerVersion (MSVC 14)
+        text_raw_size,
+        0,
+        0,
+        TEXT_RVA,
+        TEXT_RVA,  # BaseOfCode
+        IMAGE_BASE,
+        SECTION_ALIGN,
+        FILE_ALIGN,
+        6,
+        0,
+        0,
+        0,
+        6,
+        0,
+        0,
+        size_of_image,
+        HEADER_SIZE,
+        0,
+        (3) | (0 << 16),  # Subsystem (CUI) lo, DllCharacteristics hi
+        0x100000,
+        0x1000,
+        0x100000,
+        0x1000,
+        0,
+        16,
+    )
+    opt += b"\x00" * (16 * 8)  # 16 data directories
+    assert len(opt) == size_of_optional_header, (len(opt), size_of_optional_header)
+
+    sec_text = make_section_header(
+        b".text",
+        len(TEXT_BODY),
+        TEXT_RVA,
+        text_raw_size,
+        text_raw_ptr,
+        0x60000020,
+    )
+    sec_rdata = make_section_header(
+        b".rdata",
+        len(RDATA_BODY),
+        RDATA_RVA,
+        rdata_raw_size,
+        rdata_raw_ptr,
+        0x40000040,
+    )
+    sec_pdata = make_section_header(
+        b".pdata",
+        len(PDATA_BODY),
+        PDATA_RVA,
+        pdata_raw_size,
+        pdata_raw_ptr,
+        0x40000040,
+    )
+
+    buf = bytearray(full_dos + pe_sig + coff + opt + sec_text + sec_rdata + sec_pdata)
+    buf = buf.ljust(HEADER_SIZE, b"\x00")
+    buf += TEXT_BODY
+    buf = buf.ljust(text_raw_ptr + text_raw_size, b"\x00")
+    buf += RDATA_BODY
+    buf = buf.ljust(rdata_raw_ptr + rdata_raw_size, b"\x00")
+    buf += PDATA_BODY
+    buf = buf.ljust(pdata_raw_ptr + pdata_raw_size, b"\x00")
+    return bytes(buf)
+
+
+def main() -> None:
+    pe = build_pe()
+    OUT.write_bytes(pe)
+    print(f"wrote {OUT} ({len(pe)} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/win32_app64/splat.yaml b/test/win32_app64/splat.yaml
new file mode 100644
index 00000000..ec258f8d
--- /dev/null
+++ b/test/win32_app64/splat.yaml
@@ -0,0 +1,45 @@
+options:
+  base_path: split
+  platform: win32
+  compiler: MSVC6
+  basename: win32_app64
+  build_path: build
+  target_path: ../win32_app64.exe
+  asm_path: asm
+  src_path: src
+  data_path: data
+  ld_script_path: win32_app64.ld
+  cache_path: .splache
+  symbol_addrs_path: generated.symbols.txt
+  undefined_funcs_auto_path: undefined_funcs_auto.txt
+  undefined_syms_auto_path: undefined_syms_auto.txt
+  asset_path: assets
+  section_order: [".header", ".text", ".rdata", ".data", ".pdata", ".rodata", ".bss"]
+
+segments:
+  - name: header
+    type: header
+    start: 0x0
+
+  - name: text
+    type: code
+    start: 0x200
+    vram: 0x140001000
+    subsegments:
+      - [0x200, text, main_text]
+
+  - name: rdata
+    type: code
+    start: 0x400
+    vram: 0x140002000
+    subsegments:
+      - [0x400, rodata, main_rdata]
+
+  - name: pdata
+    type: code
+    start: 0x600
+    vram: 0x140003000
+    subsegments:
+      - [0x600, pdata, main_pdata]
+
+  - [0x800]
diff --git a/test_win32_pe.py b/test_win32_pe.py
new file mode 100644
index 00000000..0044c128
--- /dev/null
+++ b/test_win32_pe.py
@@ -0,0 +1,5768 @@
+#!/usr/bin/env python3
+"""Unit tests for the win32 PE platform.
+
+Builds tiny PE32 / PE32+ byte blobs in memory and asserts the parsed,
+labelled, or YAML-emitted output is what the spec dictates. Coverage:
+
+Parsers — every data directory + the structural headers:
+  - `parse_pe`              DOS / COFF / optional header / section table,
+                            fuzz-cap edge cases (bad e_lfanew, runt opt
+                            header, oversize NumberOfRvaAndSizes,
+                            machine/magic mismatch, etc.)
+  - `parse_exports`         data dir 0 — named / ordinal-only / forwarders
+  - `parse_imports`         data dir 1 — eager IAT, PE32+ thunks, hint fallback
+  - `parse_resources`       data dir 2 — type/name/lang walk + depth cap
+  - `parse_exception_table` data dir 3 — PE32+ RUNTIME_FUNCTION records
+  - `parse_relocations`     data dir 5 — HIGHLOW / DIR64
+  - `parse_debug`           data dir 6 — RSDS / NB10 CodeView records
+  - `parse_tls`             data dir 9 — callback array walk
+  - `parse_load_config`     data dir 10 — /GS cookie, SafeSEH, /guard:cf
+  - `parse_bound_imports`   data dir 11 — descriptors + forwarder refs
+  - `parse_delay_imports`   data dir 13 — v1 / v2 descriptor chains
+
+Label generation helpers (centralised in `platforms.win32`):
+  - `sanitize_label`        punctuation / leading-digit handling
+  - `compute_iat_labels`    eager + delay IAT slot deduplication
+  - `compute_export_labels` named + reserved-set seed collision
+
+YAML emission (`create_win32_config`):
+  - section classification (text / data / rodata / bss / pdata / bin)
+  - tail-section sort (COFF symtab + Authenticode signature in file order)
+  - symbol categories (entrypoint, exports, imports, delay-imports, TLS
+    callbacks, SafeSEH, CFG targets, /GS cookie)
+  - pathological inputs (BSS-only PEs, resource-only DLLs, all-forwarder
+    shims, phantom raw-pointer-zero sections, spaces in filenames)
+
+Detector helpers (`segtypes.win32.data`):
+  - `_is_string_byte`, `_scan_string`, `_scan_wide_string`,
+    `_escape_string`, `_decode_wide` — narrow + wide ANSI string
+    recognition including Latin-1 Supplement.
+
+Header rendering (`segtypes.win32.header`):
+  - `_decode_flags` (unknown-bit surfacing), `_dump_optional_header`
+    bound checks for runt headers, `_MACHINE_TYPES` / `_SUBSYSTEMS` /
+    `_DLL_CHARACTERISTICS` table coverage.
+"""
+
+import struct
+import unittest
+
+from src.splat.platforms import win32 as win32_platform
+
+
+IMAGE_BASE = 0x00400000
+SECTION_ALIGN = 0x1000
+FILE_ALIGN = 0x200
+DOS_STUB = b"MZ" + b"\x00" * 0x3A + struct.pack("<I", 0x40)
+
+
+def _opt_header_pe32_plus(
+    entry_rva: int,
+    base_of_code: int = 0x1000,
+    data_dirs=(),
+) -> bytes:
+    """PE32+ optional header (240 bytes) — matches the layout
+    `Win32SegHeader._dump_optional_header` expects."""
+    standard = struct.pack(
+        "<HBBIIIII",
+        0x020B,
+        14,
+        0,
+        0x200,
+        0x200,
+        0,
+        entry_rva,
+        base_of_code,
+    )
+    windows = struct.pack(
+        "<QIIHHHHHHIIIIHHQQQQII",
+        0x140000000,  # ImageBase
+        0x1000,
+        0x200,  # SectionAlignment, FileAlignment
+        6,
+        0,
+        0,
+        0,
+        6,
+        0,
+        0,
+        0x4000,  # SizeOfImage
+        0x200,  # SizeOfHeaders
+        0,
+        3,
+        0,
+        0x100000,
+        0x1000,
+        0x100000,
+        0x1000,
+        0,
+        16,
+    )
+    dirs = b""
+    for i in range(16):
+        if i < len(data_dirs):
+            rva, size = data_dirs[i]
+        else:
+            rva, size = 0, 0
+        dirs += struct.pack("<II", rva, size)
+    blob = standard + windows + dirs
+    assert len(blob) == 240, len(blob)
+    return blob
+
+
+def _build_pe_plus(
+    sections,
+    data_dirs=(),
+    entry_rva=0x1000,
+) -> bytes:
+    num_sections = len(sections)
+    coff = struct.pack(
+        "<HHIIIHH",
+        0x8664,
+        num_sections,
+        0x12345678,
+        0,
+        0,
+        0xF0,
+        0x002F,
+    )
+    opt = _opt_header_pe32_plus(entry_rva, data_dirs=data_dirs)
+    sec_headers = b"".join(
+        _section_header(
+            s["name"], s["vsize"], s["vaddr"], s["rsize"], s["rptr"], s["chars"]
+        )
+        for s in sections
+    )
+    header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_headers
+    header = header.ljust(FILE_ALIGN, b"\x00")
+    end = max(s["rptr"] + s["rsize"] for s in sections)
+    buf = bytearray(header.ljust(end, b"\x00"))
+    for s in sections:
+        body = s.get("body", b"")
+        body = body.ljust(s["rsize"], b"\x00")
+        buf[s["rptr"] : s["rptr"] + s["rsize"]] = body[: s["rsize"]]
+    return bytes(buf)
+
+
+def _opt_header_pe32(
+    entry_rva: int,
+    base_of_code: int = 0x1000,
+    base_of_data: int = 0x2000,
+    data_dirs=(),
+) -> bytes:
+    standard = struct.pack(
+        "<HBBIIIIIIIIIHHHHHHIIIIHHIIIIII",
+        0x010B,  # Magic = PE32
+        6,
+        0,  # LinkerMajor/Minor
+        0x200,  # SizeOfCode
+        0x200,  # SizeOfInitializedData
+        0,  # SizeOfUninitializedData
+        entry_rva,  # AddressOfEntryPoint
+        base_of_code,
+        base_of_data,
+        IMAGE_BASE,
+        SECTION_ALIGN,
+        FILE_ALIGN,
+        4,
+        0,
+        0,
+        0,
+        4,
+        0,
+        0,  # Win32VersionValue
+        0x4000,  # SizeOfImage
+        FILE_ALIGN,  # SizeOfHeaders
+        0,  # CheckSum
+        3,
+        0,  # Subsystem, DllChars
+        0x100000,
+        0x1000,
+        0x100000,
+        0x1000,
+        0,  # LoaderFlags
+        16,  # NumberOfRvaAndSizes
+    )
+    dirs = b""
+    for i in range(16):
+        if i < len(data_dirs):
+            rva, size = data_dirs[i]
+        else:
+            rva, size = 0, 0
+        dirs += struct.pack("<II", rva, size)
+    return standard + dirs
+
+
+def _section_header(
+    name: bytes,
+    virt_size: int,
+    virt_addr: int,
+    raw_size: int,
+    raw_ptr: int,
+    chars: int,
+) -> bytes:
+    return struct.pack(
+        "<8sIIIIIIHHI",
+        name.ljust(8, b"\x00")[:8],
+        virt_size,
+        virt_addr,
+        raw_size,
+        raw_ptr,
+        0,
+        0,
+        0,
+        0,
+        chars,
+    )
+
+
+def _build_pe(
+    sections,
+    data_dirs=(),
+    entry_rva=0x1000,
+) -> bytes:
+    num_sections = len(sections)
+    coff = struct.pack(
+        "<HHIIIHH",
+        0x014C,  # Machine i386
+        num_sections,
+        0x12345678,  # TimeDateStamp
+        0,
+        0,
+        0xE0,  # SizeOfOptionalHeader
+        0x010F,  # Characteristics
+    )
+    opt = _opt_header_pe32(entry_rva, data_dirs=data_dirs)
+    sec_headers = b"".join(
+        _section_header(
+            s["name"], s["vsize"], s["vaddr"], s["rsize"], s["rptr"], s["chars"]
+        )
+        for s in sections
+    )
+
+    header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_headers
+    header = header.ljust(FILE_ALIGN, b"\x00")
+
+    end = max(s["rptr"] + s["rsize"] for s in sections)
+    buf = bytearray(header.ljust(end, b"\x00"))
+    for s in sections:
+        body = s.get("body", b"")
+        body = body.ljust(s["rsize"], b"\x00")
+        buf[s["rptr"] : s["rptr"] + s["rsize"]] = body[: s["rsize"]]
+    return bytes(buf)
+
+
+class PEParseSmoke(unittest.TestCase):
+    def test_minimal_pe(self):
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+            ],
+            entry_rva=0x1000,
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.machine, 0x014C)
+        self.assertEqual(pe.image_base, IMAGE_BASE)
+        self.assertEqual(pe.entry_point_va, IMAGE_BASE + 0x1000)
+        self.assertEqual(len(pe.sections), 1)
+        self.assertEqual(pe.sections[0].name, ".text")
+        self.assertTrue(pe.sections[0].is_code)
+        # No data dirs populated → no exports/imports/relocs/pointers.
+        self.assertFalse(pe.exports)
+        self.assertFalse(pe.imports)
+        self.assertFalse(pe.pointer_rvas)
+
+
+class PEExportsTest(unittest.TestCase):
+    def test_named_export(self):
+        # Place the export directory inside an .rdata section.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        40 + 4 + 2  # past directory + funcs/names/ordinals arrays
+        # Layout inside .rdata:
+        #   0x00: IMAGE_EXPORT_DIRECTORY
+        #   0x28: function RVA array (one entry)
+        #   0x2C: name RVA array (one entry)
+        #   0x30: ordinal index array (one WORD)
+        #   0x32: function name string
+        #   0x40: dll name string
+        funcs_rva = rdata_rva + 0x28
+        names_rva = rdata_rva + 0x2C
+        ords_rva = rdata_rva + 0x30
+        func_name_rva = rdata_rva + 0x32
+        dll_name_rva = rdata_rva + 0x40
+        export_dir = struct.pack(
+            "<IIHHIIIIIII",
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,  # ordinal base
+            1,
+            1,  # num funcs, num names
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        body = bytearray(0x60)
+        body[: len(export_dir)] = export_dir
+        struct.pack_into("<I", body, 0x28, 0x1000)  # func RVA → entry
+        struct.pack_into("<I", body, 0x2C, func_name_rva)
+        struct.pack_into("<H", body, 0x30, 0)  # ordinal index
+        body[0x32 : 0x32 + 5] = b"main\x00"
+        body[0x40 : 0x40 + 10] = b"mydll.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x60,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x60)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.export_dll_name, "mydll.dll")
+        self.assertEqual(len(pe.exports), 1)
+        self.assertEqual(pe.exports[0].name, "main")
+        self.assertEqual(pe.exports[0].ordinal, 1)
+        self.assertEqual(pe.exports[0].rva, 0x1000)
+
+
+class PEOrdinalExportTest(unittest.TestCase):
+    def test_ordinal_only_export(self):
+        """Exports listed without a name (NumberOfFunctions >
+        NumberOfNames) still need a PEExport entry — captured by
+        ordinal, with .name == None."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # Layout:
+        #   0x00 : IMAGE_EXPORT_DIRECTORY
+        #   0x28 : funcs array (2 entries)
+        #   0x30 : names array (0 entries → omitted)
+        #   0x30 : ordinals array (0 entries → omitted)
+        #   0x32 : DLL name
+        funcs_rva = rdata_rva + 0x28
+        dll_name_rva = rdata_rva + 0x32
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            5,  # ordinal base
+            2,  # num funcs
+            0,  # num names
+            funcs_rva,
+            0,  # names_rva (unused)
+            0,  # ords_rva (unused)
+        )
+        # Two function RVAs.
+        struct.pack_into("<II", body, 0x28, 0x1000, 0x1100)
+        body[0x32 : 0x32 + 11] = b"OrdLib.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x200,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x200,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.export_dll_name, "OrdLib.dll")
+        self.assertEqual(len(pe.exports), 2)
+        # Ordinal base 5 + function index 0/1 = ordinals 5/6.
+        self.assertEqual(
+            [(e.name, e.ordinal) for e in pe.exports], [(None, 5), (None, 6)]
+        )
+
+
+class PEForwardedExportTest(unittest.TestCase):
+    def test_forwarder_captured(self):
+        # Construct an export directory where the single function RVA falls
+        # inside the export directory's own range — that flags it as a
+        # forwarder. The forwarder string sits at func_rva.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        export_dir_size = 0x60
+        # Layout inside .rdata:
+        #   0x00 : IMAGE_EXPORT_DIRECTORY
+        #   0x28 : function RVA array (one entry pointing at the forwarder string)
+        #   0x2C : name RVA array (one entry)
+        #   0x30 : ordinal index
+        #   0x32 : forwarder string ("KERNEL32.Sleep")
+        #   0x50 : exported name ("MySleep")
+        #   0x58 : dll name
+        funcs_rva = rdata_rva + 0x28
+        names_rva = rdata_rva + 0x2C
+        ords_rva = rdata_rva + 0x30
+        forwarder_rva = rdata_rva + 0x32
+        export_name_rva = rdata_rva + 0x50
+        dll_name_rva = rdata_rva + 0x58
+
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,
+            1,
+            1,
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        struct.pack_into("<I", body, 0x28, forwarder_rva)
+        struct.pack_into("<I", body, 0x2C, export_name_rva)
+        struct.pack_into("<H", body, 0x30, 0)
+        # Forwarder string
+        body[0x32 : 0x32 + 15] = b"KERNEL32.Sleep\x00"
+        # Export name
+        body[0x50 : 0x50 + 8] = b"MySleep\x00"
+        # DLL name
+        body[0x58 : 0x58 + 9] = b"mydll.dll"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": export_dir_size,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, export_dir_size)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.exports), 1)
+        self.assertEqual(pe.exports[0].name, "MySleep")
+        self.assertEqual(pe.exports[0].forwarder, "KERNEL32.Sleep")
+
+
+class PEImportsTest(unittest.TestCase):
+    def test_named_import(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        # Layout inside .rdata:
+        #   0x00: IMAGE_IMPORT_DESCRIPTOR (20 bytes)
+        #   0x14: IMAGE_IMPORT_DESCRIPTOR null terminator (20 bytes of zero)
+        #   0x28: ILT array (one DWORD + null terminator)
+        #   0x30: IAT array (mirrors ILT)
+        #   0x38: IMAGE_IMPORT_BY_NAME (hint + name)
+        #   0x48: DLL name
+        ilt_rva = rdata_rva + 0x28
+        iat_rva = rdata_rva + 0x30
+        ibn_rva = rdata_rva + 0x38
+        dll_name_rva = rdata_rva + 0x48
+
+        body = bytearray(0x60)
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_name_rva,
+            iat_rva,
+        )
+        # Descriptor terminator already zeroed.
+        struct.pack_into("<I", body, 0x28, ibn_rva)
+        struct.pack_into("<I", body, 0x2C, 0)  # ILT terminator
+        struct.pack_into("<I", body, 0x30, ibn_rva)
+        struct.pack_into("<I", body, 0x34, 0)
+        struct.pack_into("<H", body, 0x38, 0)  # hint
+        body[0x3A : 0x3A + 10] = b"DoStuff\x00"
+        body[0x48 : 0x48 + 11] = b"OTHER.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x60,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 40)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.imports), 1)
+        self.assertEqual(pe.imports[0].dll, "OTHER.dll")
+        self.assertEqual(pe.imports[0].name, "DoStuff")
+        self.assertEqual(pe.imports[0].iat_rva, iat_rva)
+        self.assertIsNone(pe.imports[0].ordinal)
+
+
+class PEDebugTest(unittest.TestCase):
+    def test_nb10_pdb_path(self):
+        """CodeView 5.0 (NB10) records carry a 4-byte signature + 4-byte
+        age + NUL-terminated path. Cover the older format separately
+        from RSDS, since legacy MSVC 6 binaries (Europa1400 etc.) use it."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # NB10 layout: 'NB10' + offset(4) + signature(4) + age(4) + name
+        cv_record = (
+            b"NB10"
+            + struct.pack("<I", 0)  # CodeView offset (always 0)
+            + struct.pack("<I", 0x3FFD22BC)  # signature (matches Europa1400)
+            + struct.pack("<I", 2)  # age
+            + b"legacy.pdb\x00"
+        )
+        entry = struct.pack(
+            "<IIHHIIII",
+            0,
+            0,
+            0,
+            0,
+            2,  # IMAGE_DEBUG_TYPE_CODEVIEW
+            len(cv_record),
+            rdata_rva + 28,  # AddressOfRawData
+            rdata_rptr + 28,  # PointerToRawData
+        )
+        body = bytearray(0x100)
+        body[0:28] = entry
+        body[28 : 28 + len(cv_record)] = cv_record
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 28),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.pdb_path, "legacy.pdb")
+        self.assertEqual(pe.pdb_age, 2)
+        self.assertEqual(pe.pdb_guid, "3FFD22BC")
+
+    def test_rsds_pdb_path(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # Debug directory: 28 bytes per entry.
+        debug_dir_rva = rdata_rva
+        cv_rva = rdata_rva + 28  # CodeView record sits right after the entry
+        # CodeView RSDS layout: 'RSDS' (4) + GUID (16) + age (4) + name
+        # GUID: 11223344-5566-7788-99AA-BBCCDDEEFF00 in mixed endianness
+        cv_record = (
+            b"RSDS"
+            + bytes.fromhex("44332211")  # uint32 LE
+            + bytes.fromhex("6655")  # uint16 LE
+            + bytes.fromhex("8877")  # uint16 LE
+            + bytes.fromhex("99AABBCCDDEEFF00")  # 8 raw bytes
+            + struct.pack("<I", 7)  # age
+            + b"my.pdb\x00"
+        )
+        entry = struct.pack(
+            "<IIHHIIII",
+            0,  # characteristics
+            0,  # timestamp
+            0,
+            0,  # major/minor version
+            2,  # IMAGE_DEBUG_TYPE_CODEVIEW
+            len(cv_record),  # SizeOfData
+            cv_rva,  # AddressOfRawData
+            rdata_rptr + 28,  # PointerToRawData
+        )
+        body = bytearray(0x100)
+        body[0:28] = entry
+        body[28 : 28 + len(cv_record)] = cv_record
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            # Debug dir is index 6.
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (debug_dir_rva, 28),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.pdb_path, "my.pdb")
+        self.assertEqual(pe.pdb_age, 7)
+        self.assertEqual(
+            pe.pdb_guid,
+            "11223344-5566-7788-99-AA-BB-CC-DD-EE-FF-00",
+        )
+
+
+class PETlsTest(unittest.TestCase):
+    def test_pe32_plus_tls_callbacks(self):
+        """PE32+ IMAGE_TLS_DIRECTORY puts AddressOfCallBacks at +0x18 as
+        an 8-byte VA, and each callback slot is a QWORD. Verify the
+        64-bit code path (the other test covers the 4-byte PE32 form)."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        callbacks_va = 0x140000000 + rdata_rva + 0x60
+        cb_a = 0x140000000 + 0x1000
+        cb_b = 0x140000000 + 0x1100
+
+        body = bytearray(0x200)
+        # IMAGE_TLS_DIRECTORY64 layout: 5 QWORDs + 2 DWORDs.
+        # AddressOfCallBacks is the 4th QWORD at offset 0x18.
+        struct.pack_into("<Q", body, 0x18, callbacks_va)
+        # Callbacks array (3 QWORDs: two callbacks + NUL).
+        struct.pack_into("<QQQ", body, 0x60, cb_a, cb_b, 0)
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x200,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x28),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(pe.tls_callback_vas, [cb_a, cb_b])
+
+    def test_tls_callbacks(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # TLS directory layout (PE32): 24+ bytes; AddressOfCallBacks at +0x0C.
+        callbacks_va = IMAGE_BASE + rdata_rva + 0x40
+        # Two callbacks then a NULL terminator.
+        cb_va_a = IMAGE_BASE + 0x1000
+        cb_va_b = IMAGE_BASE + 0x1004
+        tls_dir = struct.pack(
+            "<IIIIIIII",
+            0,  # StartAddressOfRawData
+            0,  # EndAddressOfRawData
+            0,  # AddressOfIndex
+            callbacks_va,  # AddressOfCallBacks
+            0,  # SizeOfZeroFill
+            0,  # Characteristics
+            0,  # padding
+            0,  # padding
+        )
+        body = bytearray(0x100)
+        body[0 : len(tls_dir)] = tls_dir
+        # Callbacks array
+        struct.pack_into("<III", body, 0x40, cb_va_a, cb_va_b, 0)
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            # TLS dir is index 9.
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, len(tls_dir)),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.tls_callback_vas, [cb_va_a, cb_va_b])
+
+
+class PEResourcesTest(unittest.TestCase):
+    def test_named_resource_type(self):
+        """Resource directory entries can be keyed by string name as well
+        as integer ID. Verify the parser captures the UTF-16 name."""
+        rsrc_rva = 0x3000
+        rsrc_rptr = FILE_ALIGN * 3
+
+        body = bytearray(0x200)
+        # Root (type) dir — 1 named entry, 0 id entries
+        struct.pack_into("<IIHHHH", body, 0x000, 0, 0, 0, 0, 1, 0)
+        # Named entry: name field has high bit set + offset to name string
+        struct.pack_into("<II", body, 0x010, 0x80000000 | 0x080, 0x80000000 | 0x018)
+        # Name dir
+        struct.pack_into("<IIHHHH", body, 0x018, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x028, 1, 0x80000000 | 0x030)
+        # Lang dir
+        struct.pack_into("<IIHHHH", body, 0x030, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x040, 0x0409, 0x048)
+        # Leaf entry
+        struct.pack_into("<IIII", body, 0x048, rsrc_rva + 0x100, 4, 0, 0)
+        # IMAGE_RESOURCE_DIR_STRING at offset 0x080: WORD length + UTF-16 chars
+        name = "MYTYPE"
+        utf16 = name.encode("utf-16-le")
+        struct.pack_into("<H", body, 0x080, len(name))
+        body[0x082 : 0x082 + len(utf16)] = utf16
+        # Leaf bytes
+        body[0x100:0x104] = b"DATA"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rsrc",
+                    "vsize": 0x200,
+                    "vaddr": rsrc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rsrc_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (0, 0), (rsrc_rva, 0x200)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.resources), 1)
+        self.assertEqual(pe.resources[0].rtype, "MYTYPE")
+        self.assertEqual(pe.resources[0].rid, 1)
+
+    def test_walk_tree(self):
+        # Build a minimal 3-level resource tree:
+        #   Type 16 (VERSION) → Name 1 → Lang 0x0409 → leaf.
+        rsrc_rva = 0x3000
+        rsrc_rptr = FILE_ALIGN * 3
+
+        # Layout inside .rsrc (offsets relative to root):
+        #   0x000: type dir header + 1 entry
+        #   0x018: name dir header + 1 entry
+        #   0x030: lang dir header + 1 entry
+        #   0x048: leaf IMAGE_RESOURCE_DATA_ENTRY
+        #   0x100: actual resource bytes (8 bytes)
+        body = bytearray(0x110)
+        # Root (type) dir
+        struct.pack_into("<IIHHHH", body, 0x000, 0, 0, 0, 0, 0, 1)  # 1 id entry
+        struct.pack_into(
+            "<II", body, 0x010, 16, 0x80000000 | 0x018
+        )  # type=16, subdir at 0x018
+
+        # Name dir
+        struct.pack_into("<IIHHHH", body, 0x018, 0, 0, 0, 0, 0, 1)
+        struct.pack_into(
+            "<II", body, 0x028, 1, 0x80000000 | 0x030
+        )  # name=1, subdir at 0x030
+
+        # Lang dir
+        struct.pack_into("<IIHHHH", body, 0x030, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x040, 0x0409, 0x048)  # lang, leaf at 0x048
+
+        # Leaf
+        struct.pack_into("<IIII", body, 0x048, rsrc_rva + 0x100, 8, 0, 0)
+        body[0x100:0x108] = b"\x01\x02\x03\x04\x05\x06\x07\x08"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rsrc",
+                    "vsize": 0x110,
+                    "vaddr": rsrc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rsrc_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            # Resource dir is index 2.
+            data_dirs=[(0, 0), (0, 0), (rsrc_rva, 0x110)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.resources), 1)
+        r = pe.resources[0]
+        self.assertEqual(r.rtype, 16)
+        self.assertEqual(r.rid, 1)
+        self.assertEqual(r.language, 0x0409)
+        self.assertEqual(r.size, 8)
+
+
+class PELoadConfigTest(unittest.TestCase):
+    def test_pe32_cookie_only_no_safeseh(self):
+        """LoadConfig with /GS but without /SAFESEH (SEHandlerTable == 0).
+        Verify security_cookie_va is captured and safe_seh_handlers
+        stays empty rather than reading garbage."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        body = bytearray(0x80)
+        struct.pack_into("<I", body, 0, 0x48)  # Size
+        struct.pack_into("<I", body, 0x3C, IMAGE_BASE + rdata_rva + 0x60)  # cookie
+        # 0x40 + 0x44 stay zero: no SafeSEH
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x48),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.security_cookie_va, IMAGE_BASE + rdata_rva + 0x60)
+        self.assertEqual(pe.safe_seh_handlers, [])
+
+    def test_pe32_security_cookie_and_safeseh(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        # IMAGE_LOAD_CONFIG_DIRECTORY32 minimal layout:
+        # Offset 0x3C = SecurityCookie VA, 0x40 = SEHandlerTable VA,
+        # 0x44 = SEHandlerCount.
+        seh_table_off = rdata_rva + 0x50
+        seh_table_va = IMAGE_BASE + seh_table_off
+        lc_size = 0x48
+        body = bytearray(0x100)
+        struct.pack_into("<I", body, 0, lc_size)  # Size
+        struct.pack_into(
+            "<I", body, 0x3C, IMAGE_BASE + rdata_rva + 0x60
+        )  # SecurityCookie
+        struct.pack_into("<I", body, 0x40, seh_table_va)  # SEHandlerTable
+        struct.pack_into("<I", body, 0x44, 2)  # SEHandlerCount
+        # SEH handler RVAs
+        struct.pack_into("<II", body, 0x50, 0x1010, 0x1020)
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, lc_size),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.security_cookie_va, IMAGE_BASE + rdata_rva + 0x60)
+        self.assertEqual(pe.safe_seh_handlers, [0x1010, 0x1020])
+
+
+class PECfgTest(unittest.TestCase):
+    def test_pe32_plus_loadconfig_cookie_only(self):
+        """An older PE32+ LoadConfig might only span 0x60 bytes
+        (covering SecurityCookie) without the +0x80..+0x90 CFG fields.
+        Parser must read the cookie and stop, not over-read."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # LoadConfig is exactly 0x60 bytes — cookie at +0x58 is included
+        # but CFG fields at +0x80 are not.
+        body = bytearray(0x60)
+        struct.pack_into("<Q", body, 0x58, 0x140100000)
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x60,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x60),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(pe.security_cookie_va, 0x140100000)
+        self.assertEqual(pe.cfg_function_rvas, [])
+        self.assertEqual(pe.cfg_flags, 0)
+
+    def test_pe32_cfg_table_with_extra_metadata_bytes(self):
+        """The top nibble of GuardFlags encodes how many extra metadata
+        bytes follow each CFG RVA in the table. Verify the parser
+        strides by `4 + extra` so entries don't shift."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        cfg_table_off = rdata_rva + 0x80
+        cfg_table_va = IMAGE_BASE + cfg_table_off
+        body = bytearray(0x200)
+        struct.pack_into("<I", body, 0x54, cfg_table_va)
+        struct.pack_into("<I", body, 0x58, 3)
+        # GuardFlags top nibble = 2 → +2 bytes per entry → stride 6.
+        struct.pack_into("<I", body, 0x5C, 0x20000000)
+        # Three entries at stride 6: RVA + 2 bytes of metadata.
+        for i, rva in enumerate([0x1000, 0x1100, 0x1200]):
+            off = 0x80 + i * 6
+            struct.pack_into("<I", body, off, rva)
+            struct.pack_into("<H", body, off + 4, 0xBEEF)  # metadata
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x200,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x60),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.cfg_function_rvas, [0x1000, 0x1100, 0x1200])
+        self.assertEqual(pe.cfg_flags, 0x20000000)
+
+    def test_pe32_plus_guardcf_table(self):
+        """PE32+ Load Config places GuardCFFunctionTable at +0x80, count
+        at +0x88, flags at +0x90. Verify the 64-bit layout (PE32 fields
+        are at +0x54/+0x58/+0x5C in the other test)."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        cfg_table_off = rdata_rva + 0xA0
+        cfg_table_va = 0x140000000 + cfg_table_off
+        body = bytearray(0x200)
+        # SecurityCookie at +0x58 (QWORD)
+        struct.pack_into("<Q", body, 0x58, 0x140100000)
+        # GuardCF fields
+        struct.pack_into("<Q", body, 0x80, cfg_table_va)
+        struct.pack_into("<Q", body, 0x88, 2)
+        struct.pack_into("<I", body, 0x90, 0)
+        # Two CFG RVAs
+        struct.pack_into("<II", body, 0xA0, 0x1100, 0x1200)
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x200,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x98),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(pe.security_cookie_va, 0x140100000)
+        self.assertEqual(pe.cfg_function_rvas, [0x1100, 0x1200])
+
+    def test_pe32_guardcf_table(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        cfg_table_off = rdata_rva + 0x80
+        cfg_table_va = IMAGE_BASE + cfg_table_off
+        body = bytearray(0x100)
+        # IMAGE_LOAD_CONFIG_DIRECTORY32: GuardCFFunctionTable +0x54,
+        # GuardCFFunctionCount +0x58, GuardFlags +0x5C.
+        struct.pack_into("<I", body, 0x54, cfg_table_va)
+        struct.pack_into("<I", body, 0x58, 3)
+        struct.pack_into("<I", body, 0x5C, 0)
+        # Three CFG entries (stride = 4, no metadata bytes).
+        struct.pack_into("<III", body, 0x80, 0x1010, 0x1020, 0x1030)
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x60),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.cfg_function_rvas, [0x1010, 0x1020, 0x1030])
+
+
+class PEExceptionTableTest(unittest.TestCase):
+    def test_runtime_functions(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # Two RUNTIME_FUNCTION entries then a null terminator.
+        body = struct.pack("<III", 0x1000, 0x1010, 0x3000)
+        body += struct.pack("<III", 0x1020, 0x1040, 0x3010)
+        body += b"\x00" * 12  # terminator
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": len(body),
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": body,
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, len(body)),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(
+            pe.runtime_functions,
+            [(0x1000, 0x1010, 0x3000), (0x1020, 0x1040, 0x3010)],
+        )
+
+
+class PEBoundImportsTest(unittest.TestCase):
+    def test_bound_import_with_forwarder_refs(self):
+        """Bound-import descriptor lists module forwarder refs after
+        itself in the same table. Verify they're collected."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        bi_size = 8 * 4  # main descriptor + 2 forwarders + null
+        body = bytearray(0x80)
+        # Main descriptor: 2 forwarder refs follow.
+        struct.pack_into("<IHH", body, 0, 0xDEADBEEF, bi_size, 2)
+        # Two forwarder-ref entries (timestamp/name_off/0).
+        struct.pack_into("<IHH", body, 8, 0xCAFEBABE, bi_size + 13, 0)
+        struct.pack_into("<IHH", body, 16, 0xBADC0DE5, bi_size + 24, 0)
+        # Null-terminator descriptor.
+        struct.pack_into("<IHH", body, 24, 0, 0, 0)
+        body[bi_size : bi_size + 13] = b"KERNEL32.dll\x00"
+        body[bi_size + 13 : bi_size + 24] = b"USER32.dll\x00"
+        body[bi_size + 24 : bi_size + 34] = b"GDI32.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, bi_size + 33),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.bound_imports), 1)
+        self.assertEqual(pe.bound_imports[0].dll, "KERNEL32.dll")
+        self.assertEqual(
+            pe.bound_imports[0].forwarder_refs, ["USER32.dll", "GDI32.dll"]
+        )
+
+    def test_named_bound_import(self):
+        # Bound-import table sits at .rdata RVA 0x2000. DLL name string sits
+        # 0x18 bytes after the directory start.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        bi_size = 8 * 2  # one entry + null terminator
+        name_off_in_dir = bi_size  # name starts right after both descriptors
+        body = bytearray(0x80)
+        # Descriptor: timestamp, name offset, # forwarders
+        struct.pack_into("<IHH", body, 0, 0xDEADBEEF, name_off_in_dir, 0)
+        # Null terminator descriptor
+        struct.pack_into("<IHH", body, 8, 0, 0, 0)
+        body[name_off_in_dir : name_off_in_dir + 12] = b"KERNEL32.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, bi_size + 16),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.bound_imports), 1)
+        self.assertEqual(pe.bound_imports[0].dll, "KERNEL32.dll")
+        self.assertEqual(pe.bound_imports[0].timestamp, 0xDEADBEEF)
+        self.assertFalse(pe.bound_imports[0].forwarder_refs)
+
+
+class PEVersionInfoTest(unittest.TestCase):
+    """Build a synthetic VS_VERSIONINFO with one StringTable / one String
+    pair and assert it decodes to the expected key/value."""
+
+    def _vs_node(
+        self, key: str, value: bytes, w_type: int, children: bytes = b""
+    ) -> bytes:
+        key_utf16 = key.encode("utf-16-le") + b"\x00\x00"
+        header_size = 6
+        body_off = header_size + len(key_utf16)
+        # Align body offset to dword
+        pad_a = (-body_off) & 3
+        value_off = body_off + pad_a
+        value_padded = value
+        pad_b = (-len(value_padded)) & 3
+        children_off = value_off + len(value_padded) + pad_b
+        total = children_off + len(children)
+        # Pad total to dword as well so the next sibling starts aligned.
+        pad_c = (-total) & 3
+        # w_value_length depends on w_type:
+        #   w_type==1 (text) → length in WCHARs (chars including NUL)
+        #   w_type==0 (binary) → length in bytes
+        if w_type == 1:
+            w_value_length = len(value) // 2
+        else:
+            w_value_length = len(value)
+        return (
+            struct.pack("<HHH", total + pad_c, w_value_length, w_type)
+            + key_utf16
+            + b"\x00" * pad_a
+            + value_padded
+            + b"\x00" * pad_b
+            + children
+            + b"\x00" * pad_c
+        )
+
+    def test_string_table(self):
+        # Build inner-most String entry: key "CompanyName", value "Acme Corp"
+        company = self._vs_node(
+            "CompanyName",
+            "Acme Corp\x00".encode("utf-16-le"),
+            w_type=1,
+        )
+        # StringTable wrapping that one String
+        string_table = self._vs_node(
+            "040904E4",
+            b"",
+            w_type=1,
+            children=company,
+        )
+        string_file_info = self._vs_node(
+            "StringFileInfo", b"", w_type=1, children=string_table
+        )
+        # Root VS_VERSION_INFO; for this test we keep its Value empty so
+        # we don't have to populate the 52-byte VS_FIXEDFILEINFO.
+        root = self._vs_node(
+            "VS_VERSION_INFO",
+            b"",
+            w_type=0,
+            children=string_file_info,
+        )
+
+        # Now wrap in a resource directory pointing at this blob.
+        rsrc_rva = 0x3000
+        rsrc_rptr = FILE_ALIGN * 3
+        body = bytearray(0x600)
+        # Root (type) dir
+        struct.pack_into("<IIHHHH", body, 0x000, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x010, 16, 0x80000000 | 0x018)
+        # Name dir
+        struct.pack_into("<IIHHHH", body, 0x018, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x028, 1, 0x80000000 | 0x030)
+        # Lang dir
+        struct.pack_into("<IIHHHH", body, 0x030, 0, 0, 0, 0, 0, 1)
+        struct.pack_into("<II", body, 0x040, 0x0409, 0x048)
+        # Leaf entry → points at our root blob at +0x100
+        struct.pack_into("<IIII", body, 0x048, rsrc_rva + 0x100, len(root), 0, 0)
+        body[0x100 : 0x100 + len(root)] = root
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rsrc",
+                    "vsize": 0x600,
+                    "vaddr": rsrc_rva,
+                    "rsize": FILE_ALIGN * 2,
+                    "rptr": rsrc_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (0, 0), (rsrc_rva, 0x600)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.version_info.get("CompanyName"), "Acme Corp")
+
+
+class PEDelayImportsTest(unittest.TestCase):
+    def test_v1_va_based_delay_import(self):
+        """Pre-VS2008 / legacy MSVC emits IMAGE_DELAYLOAD_DESCRIPTOR with
+        attrs=0 — its address fields are VAs (need ImageBase subtraction)
+        rather than RVAs. Verify the parser handles both formats."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        dll_name_va = IMAGE_BASE + rdata_rva + 0x60
+        iat_va = IMAGE_BASE + rdata_rva + 0x40
+        int_va = IMAGE_BASE + rdata_rva + 0x48
+        ibn_rva = rdata_rva + 0x50
+
+        body = bytearray(0x100)
+        struct.pack_into(
+            "<IIIIIIII",
+            body,
+            0x00,
+            0,  # attrs (v1: VA-based)
+            dll_name_va,
+            0,
+            iat_va,
+            int_va,
+            0,
+            0,
+            0xCAFE,
+        )
+        struct.pack_into("<I", body, 0x48, ibn_rva)
+        struct.pack_into("<I", body, 0x4C, 0)
+        struct.pack_into("<H", body, 0x50, 0)
+        body[0x52 : 0x52 + 9] = b"LegacyFn\x00"
+        body[0x60 : 0x60 + 10] = b"OLDDLL.dll"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x40),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.delay_imports), 1)
+        self.assertEqual(pe.delay_imports[0].dll, "OLDDLL.dll")
+        self.assertEqual(pe.delay_imports[0].name, "LegacyFn")
+
+    def test_pe32_plus_delay_import(self):
+        """PE32+ delay imports use 8-byte thunks; verify the 64-bit
+        path. (The default tests use 4-byte PE32 thunks.)"""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        dll_name_rva = rdata_rva + 0x80
+        iat_rva = rdata_rva + 0x40
+        int_rva = rdata_rva + 0x50
+        ibn_rva = rdata_rva + 0x70
+
+        body = bytearray(0x100)
+        struct.pack_into(
+            "<IIIIIIII",
+            body,
+            0x00,
+            1,
+            dll_name_rva,
+            0,
+            iat_rva,
+            int_rva,
+            0,
+            0,
+            0,
+        )
+        # ILT: one 8-byte entry pointing at IMAGE_IMPORT_BY_NAME, then NUL.
+        struct.pack_into("<Q", body, 0x50, ibn_rva)
+        struct.pack_into("<Q", body, 0x58, 0)
+        # IMAGE_IMPORT_BY_NAME (hint + name).
+        struct.pack_into("<H", body, 0x70, 0)
+        body[0x72 : 0x72 + 11] = b"NtCreateFile\x00"[:11]
+        body[0x80 : 0x80 + 11] = b"ntdll.dll\x00"
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x40),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(len(pe.delay_imports), 1)
+        self.assertEqual(pe.delay_imports[0].dll, "ntdll.dll")
+        self.assertEqual(pe.delay_imports[0].iat_rva, iat_rva)
+
+    def test_v2_delay_import(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        # IMAGE_DELAYLOAD_DESCRIPTOR (v2 / RVA-based, attrs = 1):
+        #   0x00 = attrs
+        #   0x04 = dll name RVA
+        #   0x08 = module handle RVA
+        #   0x0C = IAT RVA
+        #   0x10 = INT RVA
+        # Then a NULL terminator (32 bytes of zeros).
+        dll_name_rva = rdata_rva + 0x60
+        iat_rva = rdata_rva + 0x40
+        int_rva = rdata_rva + 0x48
+        ibn_rva = rdata_rva + 0x50
+
+        body = bytearray(0x100)
+        struct.pack_into(
+            "<IIIIIIII",
+            body,
+            0x00,
+            1,
+            dll_name_rva,
+            0,
+            iat_rva,
+            int_rva,
+            0,
+            0,
+            0xCAFEBABE,
+        )
+        # INT table — one entry (named) + NULL terminator
+        struct.pack_into("<I", body, 0x48, ibn_rva)
+        struct.pack_into("<I", body, 0x4C, 0)
+        # IMAGE_IMPORT_BY_NAME: hint + name
+        struct.pack_into("<H", body, 0x50, 0)
+        body[0x52 : 0x52 + 9] = b"D3DXLoad\x00"
+        # DLL name
+        body[0x60 : 0x60 + 10] = b"d3d8.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            # Delay-import dir is index 13.
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (rdata_rva, 0x40),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.delay_imports), 1)
+        self.assertEqual(pe.delay_imports[0].dll, "d3d8.dll")
+        self.assertEqual(pe.delay_imports[0].name, "D3DXLoad")
+        self.assertEqual(pe.delay_imports[0].iat_rva, iat_rva)
+
+
+class PEPlusImportTest(unittest.TestCase):
+    def test_pe32_plus_imports(self):
+        """PE32+ imports walk 8-byte thunks; the ordinal flag sits in the
+        high bit of a QWORD, not a DWORD. Verify the 64-bit path."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        ilt_rva = rdata_rva + 0x28
+        iat_rva = rdata_rva + 0x40
+        dll_name_rva = rdata_rva + 0x70
+        ibn_rva = rdata_rva + 0x60
+
+        body = bytearray(0x100)
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_name_rva,
+            iat_rva,
+        )
+        # ILT: 8-byte entry pointing at IMAGE_IMPORT_BY_NAME, then NUL.
+        struct.pack_into("<Q", body, 0x28, ibn_rva)
+        struct.pack_into("<Q", body, 0x30, 0)
+        # IAT mirrors ILT
+        struct.pack_into("<Q", body, 0x40, ibn_rva)
+        struct.pack_into("<Q", body, 0x48, 0)
+        # IMAGE_IMPORT_BY_NAME at 0x60: hint + name
+        struct.pack_into("<H", body, 0x60, 0)
+        body[0x62 : 0x62 + 11] = b"NtReadFile\x00"
+        # DLL name at 0x70 (no overlap with IBN)
+        body[0x70 : 0x70 + 10] = b"ntdll.dll\x00"
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x28)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(len(pe.imports), 1)
+        self.assertEqual(pe.imports[0].dll, "ntdll.dll")
+        self.assertEqual(pe.imports[0].name, "NtReadFile")
+        self.assertEqual(pe.imports[0].iat_rva, iat_rva)
+
+
+class PEPlusOrdinalImportTest(unittest.TestCase):
+    def test_pe32_plus_ordinal_import(self):
+        """PE32+ ordinal-only imports set bit 63 of the 8-byte thunk
+        (vs bit 31 on PE32). Verify the 64-bit ordinal-flag path."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        ilt_rva = rdata_rva + 0x28
+        iat_rva = rdata_rva + 0x40
+        dll_name_rva = rdata_rva + 0x60
+
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_name_rva,
+            iat_rva,
+        )
+        # ILT: 8-byte ordinal-only entry (bit 63 set, low 16 = ordinal 42)
+        struct.pack_into("<Q", body, 0x28, (1 << 63) | 42)
+        struct.pack_into("<Q", body, 0x30, 0)
+        # IAT mirrors
+        struct.pack_into("<Q", body, 0x40, (1 << 63) | 42)
+        struct.pack_into("<Q", body, 0x48, 0)
+        body[0x60 : 0x60 + 10] = b"WS2_32.dll"
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x28)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(len(pe.imports), 1)
+        self.assertEqual(pe.imports[0].dll, "WS2_32.dll")
+        self.assertIsNone(pe.imports[0].name)
+        self.assertEqual(pe.imports[0].ordinal, 42)
+
+
+class PEImportIatZeroSkipTest(unittest.TestCase):
+    def test_descriptor_with_zero_iat_is_skipped(self):
+        """An IMAGE_IMPORT_DESCRIPTOR with iat_rva == 0 has no IAT slot
+        VA to attach thunks to. parse_imports must skip such descriptors
+        rather than emit bogus PEImport entries pointing at slot 0."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        ilt_rva = rdata_rva + 0x28
+        dll_name_rva = rdata_rva + 0x40
+
+        body = bytearray(0x60)
+        # Descriptor with iat_rva = 0 (malformed but encountered in
+        # corrupt binaries).
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_name_rva,
+            0,
+        )
+        struct.pack_into("<I", body, 0x28, 0)  # ILT terminator
+        body[0x40 : 0x40 + 11] = b"BadDll.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x60,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 20)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        # The descriptor is parsed (DLL name known) but no PEImport
+        # entries are recorded since slot VAs would be meaningless.
+        self.assertEqual(pe.imports, [])
+
+
+class PEOrdinalImportTest(unittest.TestCase):
+    def test_ordinal_import(self):
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+
+        ilt_rva = rdata_rva + 0x28
+        iat_rva = rdata_rva + 0x30
+        dll_name_rva = rdata_rva + 0x40
+
+        body = bytearray(0x60)
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_name_rva,
+            iat_rva,
+        )
+        # Ordinal-only entry: high bit set, low 16 bits hold the ordinal.
+        struct.pack_into("<I", body, 0x28, 0x80000000 | 17)
+        struct.pack_into("<I", body, 0x2C, 0)  # ILT terminator
+        struct.pack_into("<I", body, 0x30, 0x80000000 | 17)
+        struct.pack_into("<I", body, 0x34, 0)
+        body[0x40 : 0x40 + 11] = b"OTHER.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x60,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 40)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.imports), 1)
+        self.assertIsNone(pe.imports[0].name)
+        self.assertEqual(pe.imports[0].ordinal, 17)
+        self.assertEqual(pe.imports[0].dll, "OTHER.dll")
+
+
+class PERelocsTest(unittest.TestCase):
+    def test_non_pointer_reloc_types_skipped(self):
+        """Reloc types other than HIGHLOW (3) and DIR64 (10) — LOW (1),
+        HIGH (2), HIGHADJ (4), and ABSOLUTE (0) padding — must NOT
+        populate pe.pointer_rvas. They describe partial relocations or
+        padding, not full pointers."""
+        reloc_rva = 0x4000
+        reloc_rptr = FILE_ALIGN * 3
+        block = struct.pack("<II", 0x6000, 20) + struct.pack(
+            "<HHHHHH",
+            (1 << 12) | 0x010,  # LOW — skip
+            (2 << 12) | 0x020,  # HIGH — skip
+            (4 << 12) | 0x030,  # HIGHADJ — skip
+            (0 << 12) | 0x040,  # ABSOLUTE padding — skip
+            (3 << 12) | 0x050,  # HIGHLOW — accept
+            0,  # padding
+        )
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".reloc",
+                    "vsize": len(block),
+                    "vaddr": reloc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": reloc_rptr,
+                    "chars": 0x42000040,
+                    "body": block,
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (reloc_rva, len(block)),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        # Only the HIGHLOW entry should land in pointer_rvas.
+        self.assertEqual(pe.pointer_rvas, {0x6050})
+
+    def test_dir64_relocs(self):
+        """PE32+ binaries emit IMAGE_REL_BASED_DIR64 (type 10) entries.
+        Verify the parser accepts both HIGHLOW (type 3) and DIR64 (type 10)
+        in the same block — both flag the same `pointer_rvas` set."""
+        reloc_rva = 0x4000
+        reloc_rptr = FILE_ALIGN * 3
+        block = struct.pack("<II", 0x5000, 16) + struct.pack(
+            "<HHHH",
+            (10 << 12) | 0x008,  # DIR64
+            (10 << 12) | 0x020,  # DIR64
+            (3 << 12) | 0x040,  # HIGHLOW
+            0,  # padding entry — must be ignored
+        )
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".reloc",
+                    "vsize": len(block),
+                    "vaddr": reloc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": reloc_rptr,
+                    "chars": 0x42000040,
+                    "body": block,
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (reloc_rva, len(block)),
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.pointer_rvas, {0x5008, 0x5020, 0x5040})
+
+    def test_multi_block_relocs(self):
+        """Two reloc blocks across different 4 KB pages should both be
+        captured, with type-0 padding skipped."""
+        reloc_rva = 0x4000
+        reloc_rptr = FILE_ALIGN * 3
+        block_a = struct.pack("<II", 0x3000, 16) + struct.pack(
+            "<HHHH",
+            (3 << 12) | 0x040,
+            (3 << 12) | 0x050,
+            (3 << 12) | 0x060,
+            0,  # padding
+        )
+        block_b = struct.pack("<II", 0x5000, 12) + struct.pack(
+            "<HH",
+            (3 << 12) | 0x100,
+            (3 << 12) | 0x200,
+        )
+        body = block_a + block_b
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".reloc",
+                    "vsize": len(body),
+                    "vaddr": reloc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": reloc_rptr,
+                    "chars": 0x42000040,
+                    "body": body,
+                },
+            ],
+            data_dirs=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (reloc_rva, len(body))],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(
+            pe.pointer_rvas,
+            {0x3040, 0x3050, 0x3060, 0x5100, 0x5200},
+        )
+
+    def test_highlow_relocs(self):
+        # Place a small .reloc block describing two HIGHLOW relocs on page
+        # 0x3000, at offsets 0x010 and 0x020.
+        reloc_rva = 0x4000
+        reloc_rptr = FILE_ALIGN * 3
+        block_size = 8 + 2 + 2 + 2 + 2  # header + 4 WORD entries
+        block = struct.pack("<II", 0x3000, block_size) + struct.pack(
+            "<HHHH",
+            (3 << 12) | 0x010,
+            (3 << 12) | 0x020,
+            0,  # padding entry (skipped)
+            0,  # padding entry (skipped)
+        )
+        body = block
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".reloc",
+                    "vsize": len(body),
+                    "vaddr": reloc_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": reloc_rptr,
+                    "chars": 0x42000040,
+                    "body": body,
+                },
+            ],
+            data_dirs=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (reloc_rva, len(body))],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.pointer_rvas, {0x3010, 0x3020})
+
+
+class CreateConfigTailSegmentSortTest(unittest.TestCase):
+    def test_coff_symtab_and_signature_emit_in_file_order(self):
+        """When both a COFF symbol table and an Authenticode signature
+        are present, their YAML segments must appear in file-offset
+        order — splat rejects non-monotonic rom_start values."""
+        body = b"\x90" * FILE_ALIGN
+        # Lay out: header(0x200) + .text(0x200) + symtab(0x40) + sig(0x40)
+        symtab_off = FILE_ALIGN * 2
+        symtab_size = 0x40
+        sig_off = symtab_off + symtab_size
+        sig_size = 0x40
+
+        data_dirs = [(0, 0)] * 4 + [(sig_off, sig_size)] + [(0, 0)] * 11
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            symtab_off,
+            4,
+            0xE0,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        for i, (rva, size) in enumerate(data_dirs):
+            struct.pack_into("<II", opt, 96 + i * 8, rva, size)
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + body + b"\x00" * symtab_size + b"\x00" * sig_size
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-sort-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "both.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "both.exe.yaml").read_text()
+            # coff_symtab must appear before signature (file order).
+            symtab_idx = yaml_txt.index("name: coff_symtab")
+            sig_idx = yaml_txt.index("name: signature")
+            self.assertLess(symtab_idx, sig_idx)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigAuthenticodeSegmentTest(unittest.TestCase):
+    def test_certificate_table_emits_bin_segment(self):
+        """An Authenticode-signed PE has data directory 4 set to the
+        signature blob's FILE offset (unlike the RVA-based directories).
+        Auto-config should emit a `signature` bin segment so the bytes
+        are split into a known file."""
+        body = b"\x90" * FILE_ALIGN
+        sig_data = b"\x00" * 0x40  # placeholder signature blob
+        sig_off = FILE_ALIGN * 2
+        sig_size = len(sig_data)
+
+        # Data directory 4 = (FILE offset, size).
+        data_dirs = [(0, 0)] * 4 + [(sig_off, sig_size)] + [(0, 0)] * 11
+
+        sec_text = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        # Patch in the data directories starting at offset 96.
+        for i, (rva, size) in enumerate(data_dirs):
+            struct.pack_into("<II", opt, 96 + i * 8, rva, size)
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec_text
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + body + sig_data
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-sig-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "signed.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "signed.exe.yaml").read_text()
+            self.assertIn("name: signature", yaml_txt)
+            self.assertIn(f"start: 0x{sig_off:X}", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigCoffSymtabSegmentTest(unittest.TestCase):
+    def test_coff_symtab_pointer_emits_bin_segment(self):
+        """Vintage MSVC 4-6 binaries can embed a deprecated COFF symbol
+        table past the last section. Modern MSVC uses PDB instead and
+        leaves PointerToSymbolTable / NumberOfSymbols zero. Emit a
+        `bin` segment for the legacy case so its bytes are extracted
+        rather than swept into the unknown-tail."""
+        body = b"\x90" * FILE_ALIGN
+        symtab_data = b"\x00" * 0x40  # 4 dummy COFF symbols
+        # PointerToSymbolTable will sit just after the .text raw bytes.
+        symtab_off = FILE_ALIGN * 2
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            symtab_off,  # PointerToSymbolTable
+            4,  # NumberOfSymbols
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + body + symtab_data
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-coff-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "vintage.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "vintage.exe.yaml").read_text()
+            self.assertIn("name: coff_symtab", yaml_txt)
+            self.assertIn(f"start: 0x{symtab_off:X}", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigAllForwardersTest(unittest.TestCase):
+    def test_dll_with_only_forwarder_exports_skips_exports_header(self):
+        """A DLL whose every export is a forwarder (e.g. apisetschema,
+        downlevel shims) should not emit a '// Exports from X' header
+        followed by zero rows. Only the '// Forwarded exports' block
+        below it should appear."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        funcs_rva = rdata_rva + 0x28
+        names_rva = funcs_rva + 0x4
+        ords_rva = names_rva + 0x4
+        name_str_rva = rdata_rva + 0x40
+        dll_name_rva = rdata_rva + 0x50
+        # The forwarder RVA must point INSIDE the export directory range
+        # for it to be classified as a forwarder. Export dir is at
+        # rdata_rva..rdata_rva+0x80; put the forwarder string there.
+        fwd_rva = rdata_rva + 0x60
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,
+            1,
+            1,
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        struct.pack_into("<I", body, 0x28, fwd_rva)  # forwarder RVA
+        struct.pack_into("<I", body, 0x2C, name_str_rva)
+        struct.pack_into("<H", body, 0x30, 0)
+        body[0x40 : 0x40 + 8] = b"OldFunc\x00"
+        body[0x50 : 0x50 + 11] = b"shim.dll\x00\x00\x00"
+        body[0x60 : 0x60 + 16] = b"KERNEL32.NewName"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-shim-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "shim.dll"
+            dll.write_bytes(pe_bytes)
+            create_win32_config(dll, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            # No "// Exports from" header since no named non-forwarder exports.
+            self.assertNotIn("// Exports from", txt)
+            # Forwarder block should still appear.
+            self.assertIn("// Forwarded exports", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class RvaToFileOffsetVirtualTailTest(unittest.TestCase):
+    def test_rva_in_virtual_only_tail_returns_none(self):
+        """A section with VirtualSize > SizeOfRawData has a virtual-only
+        tail that the loader zero-fills. RVAs in that range have no
+        backing bytes — `rva_to_file_offset` must return None instead
+        of mapping into the next section's raw bytes."""
+        # Two sections. .text: raw 0x200, virt 0x400 (0x200 tail).
+        # .rdata: raw 0x200, virt 0x10.
+        text_body = b"\x90" * FILE_ALIGN
+        rdata_body = b"\x11" * 0x10
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            2,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        # .text: virtual_size 0x400, raw_size FILE_ALIGN (0x200), raw_ptr 0x200.
+        sec_text = _section_header(
+            b".text", 0x400, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        sec_rdata = _section_header(
+            b".rdata", 0x10, 0x2000, FILE_ALIGN, FILE_ALIGN * 2, 0x40000040
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_text + sec_rdata
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + text_body + rdata_body.ljust(FILE_ALIGN, b"\x00")
+
+        pe = win32_platform.parse_pe(bytes(buf))
+        # RVA 0x1100 — past .text raw_size (0x200) but within
+        # virtual_size (0x400). Should return None.
+        self.assertIsNone(pe.rva_to_file_offset(0x1100 + 0x100))
+        # RVA 0x1100 itself — still inside raw bytes (0x100 < 0x200) — valid.
+        self.assertEqual(pe.rva_to_file_offset(0x1100), FILE_ALIGN + 0x100)
+
+
+class Win32PublicSurfaceTest(unittest.TestCase):
+    """Smoke-test the public symbols of the win32 platform module so
+    refactors that accidentally make a constant private (`_NAME`) or
+    rename a dataclass break loudly instead of silently downstream."""
+
+    def test_public_names_present(self):
+        from src.splat.platforms import win32
+
+        expected = {
+            # Dataclasses
+            "PEInfo",
+            "PESection",
+            "PEExport",
+            "PEImport",
+            "PEBoundImport",
+            "PEResource",
+            "COFFSymbol",
+            "UnwindInfo",
+            "CLRHeader",
+            # Top-level entry points
+            "parse_pe",
+            "init",
+            "info",
+            "raw_image",
+            # Helpers used by segtypes + create_config
+            "sanitize_label",
+            "compute_iat_labels",
+            "compute_export_labels",
+            "ptr_layout",
+            "resolve_exact_encoding",
+            # Machine codes
+            "MACHINE_I386",
+            "MACHINE_AMD64",
+            "MACHINE_ARM32",
+            "MACHINE_ARM64",
+            # Magic values
+            "OPT_MAGIC_PE32",
+            "OPT_MAGIC_PE32_PLUS",
+            # Data directory indices
+            "DIR_EXPORT",
+            "DIR_IMPORT",
+            "DIR_RESOURCE",
+            "DIR_EXCEPTION",
+            "DIR_CERTIFICATE",
+            "DIR_BASERELOC",
+            "DIR_DEBUG",
+            "DIR_TLS",
+            "DIR_LOAD_CONFIG",
+            "DIR_BOUND_IMPORT",
+            "DIR_DELAY_IMPORT",
+        }
+        for name in expected:
+            self.assertTrue(
+                hasattr(win32, name),
+                f"public win32 platform surface missing '{name}'",
+            )
+
+
+class CreateConfigCLRSymbolsTest(unittest.TestCase):
+    def test_dotnet_binary_emits_clr_metadata_symbols(self):
+        """A .NET PE (data dir 14 populated) should emit `clr_metadata`,
+        `clr_strong_name_signature`, `clr_resources` symbols in
+        symbol_addrs.txt so disasm cross-references resolve."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        cor20 = struct.pack(
+            "<IHHIIIIIIII",
+            72,
+            2,
+            5,
+            0x2100,
+            0x400,
+            0x00000001,
+            0x06000001,
+            0x2500,
+            0x100,
+            0x4000,
+            0x80,
+        )
+        body = bytearray(cor20 + b"\x00" * (FILE_ALIGN - len(cor20)))
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x400,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0)] * 14 + [(rdata_rva, 72), (0, 0)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-clr-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "clr.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("clr_metadata = 0x", txt)
+            self.assertIn("clr_strong_name_signature = 0x", txt)
+            self.assertIn("clr_resources = 0x", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class ParseCLRHeaderTest(unittest.TestCase):
+    def test_dotnet_cor20_header_decoded(self):
+        """A PE with data dir 14 (CLR Runtime Header) populated has
+        an IMAGE_COR20_HEADER record that pe.clr_header decodes:
+        runtime version, metadata RVA/size, entry-point token,
+        strong-name signature pointer."""
+        # COR20 header lives in .rdata; data dir 14 RVA points at it.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        cor20 = struct.pack(
+            "<IHHIIIIIIII",
+            72,  # cb (always 72)
+            2,
+            5,  # runtime major/minor (CLR 2.5)
+            0x2100,
+            0x400,  # metadata RVA/size
+            0x00000001,  # flags (COMIMAGE_FLAGS_ILONLY)
+            0x06000001,  # entry point token (MethodDef 0x1)
+            0,
+            0,  # resources RVA/size
+            0x4000,
+            0x80,  # strong name sig RVA/size
+        )
+        body = bytearray(cor20 + b"\x00" * (FILE_ALIGN - len(cor20)))
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x400,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0)] * 14 + [(rdata_rva, 72), (0, 0)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        assert pe.clr_header is not None
+        self.assertEqual(pe.clr_header.cb_size, 72)
+        self.assertEqual(pe.clr_header.runtime_major, 2)
+        self.assertEqual(pe.clr_header.runtime_minor, 5)
+        self.assertEqual(pe.clr_header.metadata_rva, 0x2100)
+        self.assertEqual(pe.clr_header.metadata_size, 0x400)
+        self.assertEqual(pe.clr_header.flags, 1)
+        self.assertEqual(pe.clr_header.entry_point_token_or_rva, 0x06000001)
+        self.assertEqual(pe.clr_header.strong_name_signature_rva, 0x4000)
+
+
+class ParseVersionTranslationTest(unittest.TestCase):
+    def test_var_file_info_translation_block_surfaced(self):
+        """The VarFileInfo 'Translation' child of a VERSION resource
+        carries an array of (LANGID, codepage) pairs as binary data.
+        Verify _walk_versioninfo_node decodes it into a comma-separated
+        '0xLLLL/0xCCCC' string stored under 'Translation'."""
+        from src.splat.platforms.win32 import _walk_versioninfo_node
+
+        # Build a synthetic VarFileInfo > Translation node with two
+        # locale pairs: en-US (0x0409, 0x04E4) and de-DE (0x0407, 0x04E4).
+        # Node layout:
+        #   wLength + wValueLength(8) + wType(0=binary) + key + value
+        def _wstr(s: str) -> bytes:
+            return (s + "\x00").encode("utf-16-le")
+
+        def _align4(off: int) -> int:
+            return (off + 3) & ~3
+
+        key = _wstr("Translation")
+        value = struct.pack("<HHHH", 0x0409, 0x04E4, 0x0407, 0x04E4)
+        header_size = 6  # wLength + wValueLength + wType
+        body_off = _align4(header_size + len(key))
+        total_size = body_off + len(value)
+        node = (
+            struct.pack("<HHH", total_size, len(value), 0)
+            + key
+            + b"\x00" * (body_off - header_size - len(key))
+            + value
+        )
+
+        out: dict = {}
+        _walk_versioninfo_node(node, 0, out, 0)
+        self.assertIn("Translation", out)
+        self.assertEqual(out["Translation"], "0x0409/0x04E4, 0x0407/0x04E4")
+
+
+class ParseUnwindInfoTest(unittest.TestCase):
+    def test_unwind_codes_decoded_from_runtime_function(self):
+        """A PE32+ binary with one RUNTIME_FUNCTION pointing at an
+        IMAGE_UNWIND_INFO record encoding a simple prologue
+        (`push rbp; sub rsp, 0x20`) must decode the codes."""
+        # Layout: text(0x1000) + pdata(0x2000, single RUNTIME_FUNCTION
+        # pointing at unwind at rdata 0x3000) + rdata(0x3000, unwind info).
+        text_body = b"\x90" * FILE_ALIGN
+        pdata_body = struct.pack("<III", 0x1000, 0x1010, 0x3000)
+        pdata_body = pdata_body.ljust(FILE_ALIGN, b"\x00")
+
+        # UNWIND_INFO:
+        #   version=1, flags=0, prolog_size=5, count=2, frame_reg=0/0
+        #   code[0]: offset=5,  op=ALLOC_SMALL(2) info=3 (3*8+8 = 32 bytes)
+        #   code[1]: offset=1,  op=PUSH_NONVOL(0) info=5 (RBP=reg 5)
+        unwind = struct.pack(
+            "<BBBB" + "BB" * 2,
+            0x01,  # version 1, flags 0
+            5,  # SizeOfProlog
+            2,  # CountOfUnwindCodes
+            0,  # FrameRegister 0
+            5,
+            0x32,  # code 0: offset 5, op ALLOC_SMALL (2), info 3
+            1,
+            0x50,  # code 1: offset 1, op PUSH_NONVOL (0), info 5 (RBP)
+        )
+        rdata_body = unwind + b"\x00" * (FILE_ALIGN - len(unwind))
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": text_body,
+                },
+                {
+                    "name": b".pdata",
+                    "vsize": 0x10,
+                    "vaddr": 0x2000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x40000040,
+                    "body": pdata_body,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": len(unwind),
+                    "vaddr": 0x3000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 3,
+                    "chars": 0x40000040,
+                    "body": rdata_body,
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0x2000, 12),
+            ],
+        )
+
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertIn(0x3000, pe.unwind_info)
+        uw = pe.unwind_info[0x3000]
+        self.assertEqual(uw.version, 1)
+        self.assertEqual(uw.prolog_size, 5)
+        self.assertEqual(len(uw.codes), 2)
+        # First code: ALLOC_SMALL with info 3.
+        self.assertEqual(uw.codes[0][1], "ALLOC_SMALL")
+        self.assertEqual(uw.codes[0][2], 3)
+        # Second code: PUSH_NONVOL with info 5 (RBP).
+        self.assertEqual(uw.codes[1][1], "PUSH_NONVOL")
+        self.assertEqual(uw.codes[1][2], 5)
+
+
+class ParseCoffSymtabTest(unittest.TestCase):
+    def test_vintage_coff_symbols_decoded(self):
+        """A vintage MSVC 4-6 PE shipping an embedded COFF symbol table:
+        each 18-byte IMAGE_SYMBOL record decodes to a COFFSymbol entry
+        on pe.coff_symbols with name / value / section / class."""
+        # Build a tiny PE with PointerToSymbolTable + NumberOfSymbols
+        # populated. Two symbols:
+        #   "main"     value=0x401000, section=1, type=0x20, class=2
+        #   ".text"    value=0,        section=1, type=0x00, class=3
+        sym_a = b"main\x00\x00\x00\x00" + struct.pack(
+            "<IhHBB", 0x00401000, 1, 0x20, 2, 0
+        )
+        sym_b = b".text\x00\x00\x00" + struct.pack("<IhHBB", 0, 1, 0x00, 3, 0)
+        symtab = sym_a + sym_b
+        # Empty string table = just a 4-byte length field of value 4.
+        str_table = struct.pack("<I", 4)
+        # Place symtab at FILE_ALIGN * 2.
+        symtab_off = FILE_ALIGN * 2
+
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            symtab_off,
+            2,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN + symtab + str_table
+
+        pe = win32_platform.parse_pe(bytes(buf))
+        self.assertEqual(len(pe.coff_symbols), 2)
+        self.assertEqual(pe.coff_symbols[0].name, "main")
+        self.assertEqual(pe.coff_symbols[0].value, 0x00401000)
+        self.assertEqual(pe.coff_symbols[0].section_number, 1)
+        self.assertEqual(pe.coff_symbols[0].storage_class, 2)
+        self.assertEqual(pe.coff_symbols[1].name, ".text")
+        self.assertEqual(pe.coff_symbols[1].section_number, 1)
+
+
+class Win32MachineTypeTableTest(unittest.TestCase):
+    def test_machine_types_cover_arch_id_landscape(self):
+        from src.splat.segtypes.win32.header import _MACHINE_TYPES
+
+        # x86/x86_64 — the ones splat actually disassembles.
+        self.assertEqual(_MACHINE_TYPES[0x014C], "i386")
+        self.assertEqual(_MACHINE_TYPES[0x8664], "amd64")
+        # ARM family — splat rejects but the header dump should still
+        # name them so the user understands why.
+        self.assertEqual(_MACHINE_TYPES[0xAA64], "ARM64")
+        self.assertEqual(_MACHINE_TYPES[0x01C4], "ARMNT (Thumb-2)")
+        # Modern architectures (RISC-V, LoongArch) for forward-compat.
+        self.assertEqual(_MACHINE_TYPES[0x5064], "RISC-V 64-bit")
+        self.assertEqual(_MACHINE_TYPES[0x6264], "LoongArch 64-bit")
+        # Vintage architectures occasionally seen in CE / NT-RISC builds.
+        self.assertEqual(_MACHINE_TYPES[0x0184], "Alpha AXP")
+        self.assertEqual(_MACHINE_TYPES[0x0200], "Itanium (IA-64)")
+
+
+class ParsePEFarPeOffsetTest(unittest.TestCase):
+    def test_e_lfanew_pointing_past_file_end_rejected(self):
+        """A fuzzed MZ header with e_lfanew = 0xFFFFFFFF (or any value
+        pointing past end-of-file) must reject cleanly rather than
+        crash on the slice read. parse_pe calls log.error which is
+        NoReturn (SystemExit)."""
+        buf = bytearray(0x40)
+        buf[0:2] = b"MZ"
+        # Set e_lfanew (file offset 0x3C) to a huge value.
+        struct.pack_into("<I", buf, 0x3C, 0xFFFFFFFF)
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(bytes(buf))
+
+
+class EscapeStringTest(unittest.TestCase):
+    """`_escape_string` produces the same GAS-compatible escape forms
+    consistently across the data segment's `.asciz` emission and the
+    wide-string `/* L"..." */` preview comments."""
+
+    def test_quote_and_backslash_escaped(self):
+        from src.splat.segtypes.win32.data import _escape_string
+
+        self.assertEqual(_escape_string(b'"\\'), r"\"\\")
+
+    def test_tab_newline_return_use_short_forms(self):
+        from src.splat.segtypes.win32.data import _escape_string
+
+        self.assertEqual(_escape_string(b"\t\n\r"), r"\t\n\r")
+
+    def test_non_printable_uses_hex_escape(self):
+        from src.splat.segtypes.win32.data import _escape_string
+
+        # 0x01 (SOH) — out of printable range, fall back to \xNN form.
+        self.assertEqual(_escape_string(b"\x01"), r"\x01")
+
+    def test_latin1_supplement_uses_hex_escape(self):
+        from src.splat.segtypes.win32.data import _escape_string
+
+        # 0xFC (ü) — _is_string_byte now accepts it for scanning, but
+        # _escape_string still emits it as \xfc so GAS interprets the
+        # byte literally instead of relying on locale codepage decoding.
+        self.assertEqual(_escape_string(b"\xfc"), r"\xfc")
+
+    def test_decode_wide_round_trips_basic_latin(self):
+        from src.splat.segtypes.win32.data import _decode_wide
+
+        # UTF-16LE bytes for "AB" → "AB" passthrough.
+        self.assertEqual(_decode_wide(b"A\x00B\x00"), "AB")
+
+    def test_decode_wide_uses_unicode_escape_for_non_ascii(self):
+        from src.splat.segtypes.win32.data import _decode_wide
+
+        # Polish 'ł' is U+0142 — outside the 0x20-0x7E printable subset
+        # used for the preview, so the comment falls back to \uNNNN
+        # form rather than the literal codepoint (keeps the generated
+        # .s ASCII-only).
+        self.assertEqual(_decode_wide(b"\x42\x01"), "\\u0142")
+
+
+class NarrowStringDetectionTest(unittest.TestCase):
+    """Narrow ANSI string scanner: NUL-terminated runs of printable bytes."""
+
+    def test_basic_ascii_run(self):
+        from src.splat.segtypes.win32.data import _scan_string
+
+        blob = b"hello\x00"
+        end = _scan_string(blob, 0)
+        self.assertEqual(end, 6)
+
+    def test_run_below_minimum_rejected(self):
+        from src.splat.segtypes.win32.data import _scan_string
+
+        # 3 chars + NUL < STRING_MIN_LEN (4).
+        self.assertIsNone(_scan_string(b"hi\x00\x00", 0))
+
+    def test_missing_terminator_rejected(self):
+        from src.splat.segtypes.win32.data import _scan_string
+
+        # Printable run with no NUL terminator before EOF.
+        self.assertIsNone(_scan_string(b"hello", 0))
+
+    def test_latin1_supplement_accepted(self):
+        from src.splat.segtypes.win32.data import _scan_string
+
+        # 'Müller' in Latin-1: M ü l l e r \0 — middle byte 0xFC is in
+        # the Latin-1 Supplement printable range (0xA0-0xFF). The
+        # scanner accepts it (symmetric with the wide-string scanner).
+        blob = b"M\xfcller\x00"
+        end = _scan_string(blob, 0)
+        self.assertEqual(end, 7)
+
+    def test_non_printable_byte_terminates(self):
+        from src.splat.segtypes.win32.data import _scan_string
+
+        # 0x01 (SOH) isn't in our printable set — scanner short-circuits
+        # without finding a NUL terminator.
+        self.assertIsNone(_scan_string(b"ab\x01cd\x00", 0))
+
+
+class WideStringDetectionTest(unittest.TestCase):
+    """Wide-string scanner returns the byte range past the WCHAR NUL."""
+
+    def test_simple_utf16_string(self):
+        from src.splat.segtypes.win32.data import _scan_wide_string, _decode_wide
+
+        # "Hello\0" in UTF-16LE
+        blob = b"H\x00e\x00l\x00l\x00o\x00\x00\x00"
+        end = _scan_wide_string(blob, 0)
+        self.assertEqual(end, 12)
+        assert end is not None
+        self.assertEqual(_decode_wide(blob[: end - 2]), "Hello")
+
+    def test_too_short_rejected(self):
+        from src.splat.segtypes.win32.data import _scan_wide_string
+
+        # "Hi\0" — only 2 WCHARs, below WIDE_STRING_MIN_LEN=4.
+        self.assertIsNone(_scan_wide_string(b"H\x00i\x00\x00\x00", 0))
+
+    def test_misaligned_offset(self):
+        from src.splat.segtypes.win32.data import _scan_wide_string
+
+        # Odd start offsets aren't valid WCHAR positions.
+        blob = b"\x00H\x00e\x00l\x00l\x00o\x00\x00\x00"
+        self.assertIsNone(_scan_wide_string(blob, 1))
+
+    def test_high_byte_nonzero_rejected(self):
+        from src.splat.segtypes.win32.data import _scan_wide_string
+
+        # If the WCHAR's high byte is nonzero (codepoint > U+00FF — i.e.
+        # outside Basic Latin + Latin-1 Supplement, the conservative
+        # Western-language coverage zone), our cautious detector skips
+        # the run to avoid false positives.
+        # Polish 'ł' is U+0142 -> UTF-16LE bytes 0x42 0x01.
+        blob = b"\x42\x01\x42\x01\x42\x01\x42\x01\x00\x00"
+        self.assertIsNone(_scan_wide_string(blob, 0))
+
+    def test_latin1_supplement_accepted(self):
+        from src.splat.segtypes.win32.data import _scan_wide_string
+
+        # Latin-1 Supplement (U+00A0 .. U+00FF) covers German umlauts,
+        # French accents, Spanish ñ — common in localised resources.
+        # 'Müller' is M ü l l e r -> 4D 00 FC 00 6C 00 6C 00 65 00 72 00.
+        blob = b"\x4d\x00\xfc\x00\x6c\x00\x6c\x00\x65\x00\x72\x00\x00\x00"
+        end = _scan_wide_string(blob, 0)
+        self.assertEqual(end, len(blob))
+
+
+class Win32SegBssTest(unittest.TestCase):
+    def test_reserved_size_resolution(self):
+        """Win32SegBss.reserved_size resolves: explicit yaml bss_size
+        wins over vram-end - vram-start arithmetic; zero when neither
+        is available."""
+        from src.splat.segtypes.win32.bss import Win32SegBss
+
+        obj = Win32SegBss.__new__(Win32SegBss)
+        # Upstream's bss_size rework requires every Segment to have a
+        # bss_size attribute. Provide it for the bypass-init path.
+        obj.bss_size = 0
+
+        # 1) Explicit bss_size in yaml wins.
+        obj.yaml = {"bss_size": 0x1000}
+        obj.vram_start = 0x10000000
+        # vram_end is a @property derived from vram_start + size; ignore
+        # via direct attribute override.
+        self.assertEqual(obj.reserved_size, 0x1000)
+
+        # 2) Without bss_size, fall back to vram_end - vram_start.
+        # vram_end on the Segment base is a property — emulate by giving
+        # obj.rom_start / rom_end so size resolves to a known value.
+        obj.yaml = {}
+        obj.rom_start = 0
+        obj.rom_end = 0x500
+        # vram_end = vram_start + (rom_end - rom_start) = 0x10000500
+        self.assertEqual(obj.reserved_size, 0x500)
+
+        # 3) Neither yaml nor a derivable vram_end → 0.
+        obj.yaml = {}
+        obj.rom_start = None
+        obj.rom_end = None
+        obj.vram_start = None
+        self.assertEqual(obj.reserved_size, 0)
+
+
+class ResolveExactEncodingTest(unittest.TestCase):
+    """Direct tests for the shared helper, complementing the segtype-
+    level inheritance test."""
+
+    def test_per_subsegment_yaml_wins(self):
+        self.assertTrue(
+            win32_platform.resolve_exact_encoding({"exact_encoding": True}, None)
+        )
+        self.assertFalse(
+            win32_platform.resolve_exact_encoding({"exact_encoding": False}, None)
+        )
+
+    def test_parent_yaml_used_when_subsegment_silent(self):
+        import types
+
+        parent = types.SimpleNamespace(yaml={"exact_encoding": True})
+        self.assertTrue(win32_platform.resolve_exact_encoding({}, parent))
+        # Subsegment 'False' beats parent 'True'.
+        self.assertFalse(
+            win32_platform.resolve_exact_encoding({"exact_encoding": False}, parent)
+        )
+
+    def test_default_returned_when_neither_speaks_up(self):
+        self.assertFalse(win32_platform.resolve_exact_encoding({}, None))
+        self.assertTrue(win32_platform.resolve_exact_encoding({}, None, default=True))
+
+    def test_non_dict_yaml_is_silent(self):
+        # If the subsegment YAML is a list (the bare-tuple shorthand
+        # form), `isinstance(yaml, dict)` falls through; helper must
+        # not crash, just return the default.
+        self.assertFalse(
+            win32_platform.resolve_exact_encoding([0x200, "text", "main"], None)
+        )
+
+    def test_parent_without_yaml_attribute_handled(self):
+        # The parent argument is sometimes None at runtime; sometimes
+        # a Segment-shaped object whose `.yaml` is a list. Both must
+        # fall through to the default cleanly.
+        import types
+
+        no_yaml = types.SimpleNamespace()
+        list_yaml = types.SimpleNamespace(yaml=[])
+        self.assertFalse(win32_platform.resolve_exact_encoding({}, no_yaml))
+        self.assertFalse(win32_platform.resolve_exact_encoding({}, list_yaml))
+
+
+class Win32ExactEncodingInheritanceTest(unittest.TestCase):
+    def test_inheritance_chain(self):
+        """exact_encoding resolves in priority order:
+        subsegment YAML > parent code-group YAML > class default."""
+        from src.splat.segtypes.win32.text import Win32SegText
+        from src.splat.segtypes.win32.data import Win32SegData
+        from src.splat.segtypes.win32.pdata import Win32SegPdata
+
+        for cls in (Win32SegText, Win32SegData, Win32SegPdata):
+            # Instantiate manually (bypass splat's segment factory).
+            obj = cls.__new__(cls)
+            obj.yaml = {}
+            obj.parent = None
+            self.assertFalse(obj.exact_encoding)  # type: ignore[attr-defined]
+
+            obj.yaml = {"exact_encoding": True}
+            self.assertTrue(obj.exact_encoding)  # type: ignore[attr-defined]
+
+            obj.yaml = {}
+            # Use a simple namespace as parent — it just needs a .yaml attr.
+            import types
+
+            obj.parent = types.SimpleNamespace(yaml={"exact_encoding": True})  # type: ignore[assignment]
+            self.assertTrue(obj.exact_encoding)  # type: ignore[attr-defined]
+
+            obj.yaml = {"exact_encoding": False}
+            self.assertFalse(obj.exact_encoding)  # type: ignore[attr-defined]
+
+
+class CreateConfigRejectionTest(unittest.TestCase):
+    def test_non_pe_file_falls_through_to_error(self):
+        """create_config.main() with a file that's neither N64 / PSX /
+        ELF / PE bytes must log.error → SystemExit, not crash silently
+        partway through."""
+        import tempfile
+        from src.splat.scripts.create_config import main as create_config_main
+        from pathlib import Path as _P
+
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            f.write(b"random non-PE garbage data, neither MZ nor ELF")
+            tmp = _P(f.name)
+        try:
+            with self.assertRaises(SystemExit):
+                create_config_main(tmp, None)
+        finally:
+            tmp.unlink()
+
+
+class CreateConfigExportLabelDedupTest(unittest.TestCase):
+    def test_clashing_sanitized_labels_get_ordinal_suffix(self):
+        """Two C++ exports with distinct mangled names that sanitize to the
+        same identifier (e.g. 'foo@bar' and 'foo$bar' both -> 'foo_bar')
+        must produce unique symbol_addrs labels — splat rejects
+        duplicates."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        funcs_rva = rdata_rva + 0x28
+        names_rva = funcs_rva + 0x8  # 2 funcs * 4
+        ords_rva = names_rva + 0x8  # 2 names * 4
+        # Strings laid out at +0x40, +0x48, +0x50.
+        s1_rva = rdata_rva + 0x40
+        s2_rva = rdata_rva + 0x48
+        dll_name_rva = rdata_rva + 0x50
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,  # ordinal base
+            2,  # num funcs
+            2,  # num names
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        struct.pack_into("<II", body, 0x28, 0x1000, 0x1010)  # funcs
+        struct.pack_into("<II", body, 0x30, s1_rva, s2_rva)  # name ptrs
+        struct.pack_into("<HH", body, 0x38, 0, 1)  # ordinal idxs
+        body[0x40 : 0x40 + 8] = b"foo@bar\x00"
+        body[0x48 : 0x48 + 8] = b"foo$bar\x00"
+        body[0x50 : 0x50 + 11] = b"clash.dll\x00\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-dedup-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "clash.dll"
+            dll.write_bytes(pe_bytes)
+            create_win32_config(dll, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            # Both clash to 'foo_bar' after sanitize. First wins as-is;
+            # second gets ordinal suffix.
+            self.assertIn("foo_bar = 0x", txt)
+            self.assertIn("foo_bar__ord2", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigEntrypointExportCollisionTest(unittest.TestCase):
+    def test_export_named_entrypoint_does_not_collide_with_entrypoint_symbol(self):
+        """A DLL whose entry point exists AND whose export table contains
+        a function literally named 'entrypoint' would otherwise emit two
+        symbol_addrs rows with the same label, which splat rejects.
+        Second occurrence must get the ordinal suffix."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        funcs_rva = rdata_rva + 0x28
+        names_rva = funcs_rva + 0x4
+        ords_rva = names_rva + 0x4
+        name_str_rva = rdata_rva + 0x40
+        dll_name_rva = rdata_rva + 0x50
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,
+            1,
+            1,
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        struct.pack_into("<I", body, 0x28, 0x1010)
+        struct.pack_into("<I", body, 0x2C, name_str_rva)
+        struct.pack_into("<H", body, 0x30, 0)
+        body[0x40 : 0x40 + 11] = b"entrypoint\x00"
+        body[0x50 : 0x50 + 11] = b"collide.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-collide-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "collide.dll"
+            dll.write_bytes(pe_bytes)
+            create_win32_config(dll, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            # Built-in entrypoint kept; conflicting export suffixed.
+            self.assertIn("entrypoint = 0x", txt)
+            self.assertIn("entrypoint__ord1", txt)
+            # No duplicate bare "entrypoint =" lines.
+            label_count = sum(
+                1
+                for line in txt.splitlines()
+                if line.lstrip().startswith("entrypoint =")
+            )
+            self.assertEqual(label_count, 1)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigZeroSizeBssTest(unittest.TestCase):
+    def test_zero_virtual_size_bss_section_is_skipped(self):
+        """A section flagged as UNINITIALIZED_DATA with VirtualSize = 0
+        has no runtime footprint. Emitting `bss_size: 0x0` would create
+        a malformed splat segment — skip such sections in YAML
+        generation."""
+        # Build PE with .text + a degenerate empty .bss-flagged section.
+        text_body = b"\x90" * FILE_ALIGN
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": text_body,
+                },
+                # raw_size=0, virtual_size=0, characteristics has
+                # SCN_CNT_UNINITIALIZED_DATA (0x00000080).
+                {
+                    "name": b".bss",
+                    "vsize": 0,
+                    "vaddr": 0x2000,
+                    "rsize": 0,
+                    "rptr": 0,
+                    "chars": 0xC0000080,
+                    "body": b"",
+                },
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-emptybss-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "emptybss.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_txt = (tmpdir / "emptybss.exe.yaml").read_text()
+            self.assertNotIn("bss_size: 0x0", yaml_txt)
+            # Empty .bss section should not produce a segment line at all.
+            self.assertNotIn("name: bss,", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigSectionNameDedupTest(unittest.TestCase):
+    def test_duplicate_section_names_disambiguated_in_yaml(self):
+        """PE format doesn't require unique section names. A packed or
+        hand-crafted image with two '.text' sections must still produce
+        a valid splat YAML (no duplicate segment names)."""
+        # Two .text-named sections.
+        sec_body = b"\x90" * FILE_ALIGN
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": sec_body,
+                },
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x2000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x60000020,
+                    "body": sec_body,
+                },
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-dupesec-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "dupe.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_path = tmpdir / "dupe.exe.yaml"
+            yaml_txt = yaml_path.read_text()
+            # First occurrence keeps the bare name; second gets a "_1" suffix.
+            self.assertIn("name: text", yaml_txt)
+            self.assertIn("name: text_1", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigLeadingDigitSanitizeTest(unittest.TestCase):
+    def test_export_starting_with_digit_gets_underscore_prefix(self):
+        """Exports named with a leading digit (legacy D3D / some Delphi
+        outputs) must not produce GAS-invalid labels like '7ZipOpen = ...'.
+        Prefix with underscore to keep the identifier valid."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        funcs_rva = rdata_rva + 0x28
+        names_rva = funcs_rva + 0x4
+        ords_rva = names_rva + 0x4
+        name_str_rva = rdata_rva + 0x40
+        dll_name_rva = rdata_rva + 0x50
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,
+            1,
+            1,
+            funcs_rva,
+            names_rva,
+            ords_rva,
+        )
+        struct.pack_into("<I", body, 0x28, 0x1000)
+        struct.pack_into("<I", body, 0x2C, name_str_rva)
+        struct.pack_into("<H", body, 0x30, 0)
+        body[0x40 : 0x40 + 9] = b"7ZipOpen\x00"
+        body[0x50 : 0x50 + 7] = b"7z.dll\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-digit-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "digit.dll"
+            dll.write_bytes(pe_bytes)
+            create_win32_config(dll, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("_7ZipOpen = 0x", txt)
+            # No bare-digit-leading labels.
+            for line in txt.splitlines():
+                if "=" in line and "0x" in line:
+                    label = line.split("=")[0].strip()
+                    if label:
+                        self.assertFalse(
+                            label[0].isdigit(),
+                            f"label starts with digit: {label!r}",
+                        )
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigImportDllSanitizeTest(unittest.TestCase):
+    def test_dll_name_with_hyphen_is_fully_sanitized(self):
+        """A DLL name like 'api-ms-win-core-com-l1-1-0.dll' would, with
+        bare `.replace('.', '_')`, leave hyphens in the label — invalid
+        GAS identifiers. Sanitize the whole stem."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = bytearray(0x100)
+        ilt_rva = rdata_rva + 0x50
+        iat_rva = rdata_rva + 0x60
+        dll_rva = rdata_rva + 0x30
+        hn_rva = rdata_rva + 0x70
+        # Descriptor 1 + null terminator.
+        struct.pack_into("<IIIII", body, 0x00, ilt_rva, 0, 0, dll_rva, iat_rva)
+        struct.pack_into("<IIIII", body, 0x14, 0, 0, 0, 0, 0)
+        # Hyphenated DLL name.
+        dll_name = b"api-ms-win-core-com-l1-1-0.dll\x00"
+        body[0x30 : 0x30 + len(dll_name)] = dll_name
+        # Single thunk + NUL.
+        struct.pack_into("<II", body, 0x50, hn_rva, 0)
+        struct.pack_into("<II", body, 0x60, hn_rva, 0)
+        # Hint/name.
+        body[0x70 : 0x70 + 2] = b"\x00\x00"
+        body[0x72 : 0x72 + 9] = b"CoCreate\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x100)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-dllname-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "hyphen.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("imp_api_ms_win_core_com_l1_1_0_dll_CoCreate", txt)
+            # No raw hyphens in any emitted symbol label (label part only,
+            # not the comment tail).
+            for line in txt.splitlines():
+                if "=" in line and "0x" in line:
+                    label = line.split("=")[0].strip()
+                    self.assertNotIn("-", label, f"hyphen leaked into label: {line}")
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigImportLabelDedupTest(unittest.TestCase):
+    def test_clashing_sanitized_import_labels_get_rva_suffix(self):
+        """Two imports from the same DLL with distinct mangled names
+        that sanitize to the same identifier must produce unique
+        symbol_addrs labels — splat rejects duplicates."""
+        # Layout: .text + .rdata where .rdata holds the import table:
+        #   0x00..0x28 : two IMAGE_IMPORT_DESCRIPTOR + null terminator
+        #   0x40..0x4F : DLL name "kernel32.dll\x00"
+        #   0x50..0x57 : ILT (two thunks + NUL)
+        #   0x60..0x67 : IAT  (two thunks + NUL, mirrors ILT)
+        #   0x70..    : hint/name records and the function names
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = bytearray(0x100)
+
+        ilt_rva = rdata_rva + 0x50
+        iat_rva = rdata_rva + 0x60
+        dll_rva = rdata_rva + 0x40
+        # Hint/name records.
+        hn1_rva = rdata_rva + 0x70
+        hn2_rva = rdata_rva + 0x80
+
+        # Descriptor 1: kernel32.dll with our two imports.
+        struct.pack_into(
+            "<IIIII",
+            body,
+            0x00,
+            ilt_rva,
+            0,
+            0,
+            dll_rva,
+            iat_rva,
+        )
+        # NULL terminator descriptor.
+        struct.pack_into("<IIIII", body, 0x14, 0, 0, 0, 0, 0)
+
+        body[0x40 : 0x40 + 13] = b"kernel32.dll\x00"
+
+        # ILT entries (32-bit thunks since this is PE32).
+        struct.pack_into("<III", body, 0x50, hn1_rva, hn2_rva, 0)
+        struct.pack_into("<III", body, 0x60, hn1_rva, hn2_rva, 0)
+
+        # Hint/name records: each is WORD hint + NUL-terminated name.
+        body[0x70 : 0x70 + 2] = b"\x00\x00"
+        body[0x72 : 0x72 + 8] = b"foo@bar\x00"
+        body[0x80 : 0x80 + 2] = b"\x00\x00"
+        body[0x82 : 0x82 + 8] = b"foo$bar\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x100)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-imp-dedup-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "imp_clash.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            # First import wins as-is; second collides and gets RVA suffix.
+            self.assertIn("imp_kernel32_dll_foo_bar = 0x", txt)
+            self.assertIn("imp_kernel32_dll_foo_bar__rva", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigEntrypointOmissionTest(unittest.TestCase):
+    def test_dll_with_no_entrypoint_omits_entrypoint_symbol(self):
+        """A DLL built without DllMain has AddressOfEntryPoint = 0. The
+        generated symbol_addrs file must NOT include an 'entrypoint = ...'
+        line in that case, otherwise we'd label the PE header as a
+        function."""
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        # PE32 DLL with entry_point_rva = 0.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = b"\x00" * 0x10
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            2,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F | 0x2000,  # IMAGE_FILE_DLL
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0))
+        sec1 = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        sec2 = _section_header(
+            b".rdata", 0x10, rdata_rva, FILE_ALIGN, rdata_rptr, 0x40000040
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec1 + sec2
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN + body.ljust(FILE_ALIGN, b"\x00")
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-noentry-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll_path = tmpdir / "no_entry.dll"
+            dll_path.write_bytes(bytes(buf))
+            create_win32_config(dll_path, bytes(buf))
+            sym_path = tmpdir / "symbol_addrs.txt"
+            self.assertTrue(sym_path.exists(), "symbols file should be written")
+            self.assertNotIn(
+                "entrypoint =",
+                sym_path.read_text(),
+                "entrypoint must be omitted when AddressOfEntryPoint = 0",
+            )
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class Win32SegtypeAliasTest(unittest.TestCase):
+    def test_asm_resolves_to_text(self):
+        """`type: asm` in YAML must resolve to Win32SegAsm which inherits
+        from Win32SegText — same behaviour as `type: text`, matching the
+        conventional segtype name used by the other splat platforms."""
+        from src.splat.segtypes.win32.asm import Win32SegAsm
+        from src.splat.segtypes.win32.text import Win32SegText
+
+        self.assertTrue(issubclass(Win32SegAsm, Win32SegText))
+        # No fields overridden — pure alias.
+        self.assertEqual(Win32SegAsm.__bases__, (Win32SegText,))
+
+
+class Win32SanitizeLabelHelpersTest(unittest.TestCase):
+    def test_sanitize_label_passthrough_for_clean_id(self):
+        self.assertEqual(win32_platform.sanitize_label("CreateThread"), "CreateThread")
+
+    def test_sanitize_label_replaces_punctuation(self):
+        self.assertEqual(win32_platform.sanitize_label("foo@bar-baz?"), "foo_bar_baz_")
+
+    def test_sanitize_label_prefixes_leading_digit(self):
+        self.assertEqual(win32_platform.sanitize_label("7zopen"), "_7zopen")
+
+    def test_sanitize_label_leaves_underscore_leading_alone(self):
+        self.assertEqual(win32_platform.sanitize_label("_main"), "_main")
+
+    def test_sanitize_label_empty_string(self):
+        self.assertEqual(win32_platform.sanitize_label(""), "")
+
+    def test_compute_iat_labels_collision_dedup(self):
+        """Two imports from the same DLL that sanitize to the same label
+        get the second occurrence suffixed with the slot RVA."""
+        pe = win32_platform.PEInfo(image_base=0x400000)
+        pe.imports.append(
+            win32_platform.PEImport(
+                dll="user32.dll", name="foo@x", ordinal=None, iat_rva=0x2000
+            )
+        )
+        pe.imports.append(
+            win32_platform.PEImport(
+                dll="user32.dll", name="foo$x", ordinal=None, iat_rva=0x2004
+            )
+        )
+        labels = win32_platform.compute_iat_labels(pe)
+        # Two distinct VAs, two distinct labels — first bare, second suffixed.
+        self.assertEqual(labels[0x400000 + 0x2000], "imp_user32_dll_foo_x")
+        self.assertEqual(labels[0x400000 + 0x2004], "imp_user32_dll_foo_x__rva2004")
+
+    def test_compute_export_labels_reserves_seed_set(self):
+        """An export literally named 'entrypoint' must NOT clobber the
+        synthesized entry-point symbol — dedup adds the ordinal suffix."""
+        pe = win32_platform.PEInfo(image_base=0x10000000)
+        pe.exports.append(
+            win32_platform.PEExport(
+                name="entrypoint", ordinal=7, rva=0x1234, forwarder=None
+            )
+        )
+        labels = win32_platform.compute_export_labels(pe, reserved={"entrypoint"})
+        self.assertEqual(labels[0x10000000 + 0x1234], "entrypoint__ord7")
+
+    def test_compute_export_labels_skips_forwarders(self):
+        pe = win32_platform.PEInfo(image_base=0x10000000)
+        pe.exports.append(
+            win32_platform.PEExport(
+                name="GoesElsewhere",
+                ordinal=1,
+                rva=0x100,
+                forwarder="OTHER.dll.DoThing",
+            )
+        )
+        labels = win32_platform.compute_export_labels(pe)
+        self.assertEqual(labels, {})
+
+
+class ParseImportsHintFallbackTest(unittest.TestCase):
+    def test_empty_name_falls_back_to_hint_as_ordinal(self):
+        """When a hint/name record has a zero-byte name string (malformed
+        or stripped), parse_imports must capture the 16-bit hint as the
+        import's ordinal so the IAT slot still gets a meaningful
+        `imp_X_ordinal_N` label instead of `imp_X_ordinal_None`."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = bytearray(0x100)
+        ilt_rva = rdata_rva + 0x50
+        iat_rva = rdata_rva + 0x60
+        dll_rva = rdata_rva + 0x30
+        hn_rva = rdata_rva + 0x70
+        # Descriptor 1 + null terminator.
+        struct.pack_into("<IIIII", body, 0x00, ilt_rva, 0, 0, dll_rva, iat_rva)
+        struct.pack_into("<IIIII", body, 0x14, 0, 0, 0, 0, 0)
+        body[0x30 : 0x30 + 9] = b"empty.dll"
+        # ILT/IAT: single thunk → hint/name record at hn_rva, then NUL.
+        struct.pack_into("<II", body, 0x50, hn_rva, 0)
+        struct.pack_into("<II", body, 0x60, hn_rva, 0)
+        # Hint/name: hint = 0x1234, name string = "" (just a NUL).
+        struct.pack_into("<H", body, 0x70, 0x1234)
+        body[0x72] = 0  # name terminator
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x100)],
+        )
+
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(len(pe.imports), 1)
+        self.assertIsNone(pe.imports[0].name)
+        self.assertEqual(pe.imports[0].ordinal, 0x1234)
+
+
+class CreateConfigSecurityCookieTest(unittest.TestCase):
+    def test_security_cookie_va_emits_symbol(self):
+        """A binary with /GS enabled has a SecurityCookie VA in its
+        LoadConfig directory. Promote it as a `security_cookie` data
+        symbol so disasm cross-references to the cookie's storage slot
+        resolve to a meaningful label."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        lc_size = 0x48
+        body = bytearray(0x80)
+        struct.pack_into("<I", body, 0, lc_size)
+        struct.pack_into("<I", body, 0x3C, IMAGE_BASE + rdata_rva + 0x50)
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0)] * 10 + [(rdata_rva, lc_size)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-gs-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "gs.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("security_cookie = 0x", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigUnwindSymbolsTest(unittest.TestCase):
+    def test_runtime_function_unwind_rvas_emit_symbols(self):
+        """Each PE32+ RUNTIME_FUNCTION entry's UnwindInfoAddress should
+        produce an `unwind_<va>` symbol in symbol_addrs.txt so the
+        pdata `.long` rows can reference unwind info by label."""
+        # Build a PE32+ binary with .pdata containing 2 RUNTIME_FUNCTION
+        # entries: (1000, 100D, 4000) and (1100, 110D, 4020).
+        text_body = b"\x90" * FILE_ALIGN
+        # .pdata layout (PE32+): 12 bytes per record + 12-byte null terminator
+        pdata_body = struct.pack(
+            "<IIIIIIIII",
+            0x1000,
+            0x100D,
+            0x4000,
+            0x1100,
+            0x110D,
+            0x4020,
+            0,
+            0,
+            0,
+        )
+        pdata_body = pdata_body.ljust(FILE_ALIGN, b"\x00")
+
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x200,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": text_body,
+                },
+                {
+                    "name": b".pdata",
+                    "vsize": 0x100,
+                    "vaddr": 0x3000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x40000040,
+                    "body": pdata_body,
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0x3000, 24),  # Exception Table → .pdata
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-unwind-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "uw.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            # Unwind RVAs 0x4000 / 0x4020 → VAs 0x140004000 / 0x140004020.
+            self.assertIn("unwind_140004000 = 0x", txt)
+            self.assertIn("unwind_140004020 = 0x", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigMingwClangDetectionTest(unittest.TestCase):
+    def test_mingw_imports_trigger_mingw_compiler_tag(self):
+        """A PE importing libgcc_s / libstdc++ / libwinpthread is GCC-
+        linked MinGW. Generated YAML should say `compiler: MINGW`."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = bytearray(0x100)
+        ilt_rva = rdata_rva + 0x50
+        iat_rva = rdata_rva + 0x60
+        dll_rva = rdata_rva + 0x30
+        hn_rva = rdata_rva + 0x70
+        struct.pack_into("<IIIII", body, 0x00, ilt_rva, 0, 0, dll_rva, iat_rva)
+        struct.pack_into("<IIIII", body, 0x14, 0, 0, 0, 0, 0)
+        body[0x30 : 0x30 + 23] = b"libgcc_s_dw2-1.dll\x00\x00\x00\x00\x00"
+        struct.pack_into("<II", body, 0x50, hn_rva, 0)
+        struct.pack_into("<II", body, 0x60, hn_rva, 0)
+        body[0x70 : 0x70 + 14] = b"\x00\x00__main\x00\x00\x00\x00\x00\x00"
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x20,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x20,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0), (rdata_rva, 0x100)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-mingw-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "mingw.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_txt = (tmpdir / "mingw.exe.yaml").read_text()
+            self.assertIn("compiler: MINGW", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigCfgSymbolsTest(unittest.TestCase):
+    def test_cfg_function_rvas_emit_cfg_target_rows(self):
+        """A /guard:cf-enabled PE lists every valid indirect-call target
+        in the GuardCFFunctionTable. Promote each to a `cfg_target_N`
+        symbol so the disassembly references them by label."""
+        # PE32 with LoadConfig pointing to a small CFG function table.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # LoadConfig at +0x00 (size 0x60), CFG table at +0x70.
+        cfg_table_off = 0x70
+        cfg_table_va = IMAGE_BASE + rdata_rva + cfg_table_off
+        lc_size = 0x60
+        body = bytearray(0x100)
+        struct.pack_into("<I", body, 0, lc_size)
+        struct.pack_into("<I", body, 0x54, cfg_table_va)  # GuardCFFunctionTable
+        struct.pack_into("<I", body, 0x58, 3)  # GuardCFFunctionCount
+        struct.pack_into("<I", body, 0x5C, 0)  # GuardFlags = 0 (no extra bytes)
+        struct.pack_into("<III", body, cfg_table_off, 0x1000, 0x1010, 0x1020)
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0)] * 10 + [(rdata_rva, lc_size)],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-cfg-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "cfg.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("cfg_target_0 = 0x", txt)
+            self.assertIn("cfg_target_1 = 0x", txt)
+            self.assertIn("cfg_target_2 = 0x", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigReadOnlyTailBssTest(unittest.TestCase):
+    def test_readonly_section_with_tail_gets_bss_subsegment(self):
+        """A .rdata section with VirtualSize > SizeOfRawData has a tail
+        that the loader zero-fills at map time. Even though it's not
+        writable, we still need a BSS subsegment so splat models the
+        linker layout correctly."""
+        text_body = b"\x90" * FILE_ALIGN
+        rdata_body = b"\x11" * 0x10  # only 0x10 raw bytes
+        sec_text = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        # .rdata with virtual_size = 0x100 but raw_size = 0x200 (FILE_ALIGN).
+        # Actually rdata raw is FILE_ALIGN (0x200 bytes), virtual = 0x400 →
+        # tail = 0x200 zero-fill.
+        sec_rdata = _section_header(
+            b".rdata", 0x400, 0x2000, FILE_ALIGN, FILE_ALIGN * 2, 0x40000040
+        )
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            2,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_text + sec_rdata
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + text_body + rdata_body.ljust(FILE_ALIGN, b"\x00")
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-rotail-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "rotail.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "rotail.exe.yaml").read_text()
+            # rdata's tail section should be BSS.
+            self.assertIn("rdata_bss", yaml_txt)
+            self.assertIn("bss_size: 0x200", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigTlsAndSafeSehSymbolsTest(unittest.TestCase):
+    def test_tls_callbacks_and_safeseh_emit_symbol_addrs_rows(self):
+        """The PE optional header's TLS directory enumerates callbacks
+        run by the loader before DllMain. SafeSEH handlers (from the
+        Load Config directory) name every legal exception filter. Both
+        should appear in the generated symbol_addrs.txt so the analyst
+        can navigate to them by label."""
+        # Build a PE32 with TLS directory + LoadConfig + SafeSEH table.
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        # TLS directory layout: StartAddrOfRawData(4) + EndAddr(4) +
+        # AddressOfIndex(4) + AddressOfCallBacks(4) + SizeOfZeroFill(4)
+        # + Characteristics(4) = 0x18 bytes.
+        tls_dir_rva = rdata_rva + 0x00
+        cb_array_rva = rdata_rva + 0x18
+        # LoadConfig at rdata + 0x40 (size 0x48), SEH table at rdata + 0x90.
+        lc_off = 0x40
+        lc_rva = rdata_rva + lc_off
+        seh_table_off = 0x90
+        seh_table_va = IMAGE_BASE + rdata_rva + seh_table_off
+
+        body = bytearray(0x100)
+        # TLS dir.
+        struct.pack_into(
+            "<IIIIII",
+            body,
+            0,
+            0,
+            0,
+            0,  # raw start/end/index VAs
+            IMAGE_BASE + cb_array_rva,  # AddressOfCallBacks
+            0,
+            0,
+        )
+        # Callback array: two VAs then NUL.
+        struct.pack_into(
+            "<III",
+            body,
+            0x18,
+            IMAGE_BASE + 0x1010,
+            IMAGE_BASE + 0x1020,
+            0,
+        )
+        # LoadConfig (size 0x48).
+        struct.pack_into("<I", body, lc_off + 0x00, 0x48)
+        struct.pack_into("<I", body, lc_off + 0x3C, IMAGE_BASE + 0x2400)  # cookie
+        struct.pack_into("<I", body, lc_off + 0x40, seh_table_va)
+        struct.pack_into("<I", body, lc_off + 0x44, 2)
+        # SEH handler RVAs.
+        struct.pack_into("<II", body, seh_table_off, 0x1010, 0x1020)
+
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (0, 0),
+                (tls_dir_rva, 0x18),
+                (lc_rva, 0x48),
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-tls-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "tls.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            txt = (tmpdir / "symbol_addrs.txt").read_text()
+            self.assertIn("tls_callback_0 = 0x", txt)
+            self.assertIn("tls_callback_1 = 0x", txt)
+            self.assertIn("safeseh_0 = 0x", txt)
+            self.assertIn("safeseh_1 = 0x", txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigNoFileBackedSectionsTest(unittest.TestCase):
+    def test_pe_with_only_bss_sections_still_produces_valid_yaml(self):
+        """A PE with no file-backed sections (pure BSS, e.g. some
+        kernel-mode stubs or hand-crafted images) must still produce a
+        valid splat YAML — the header segment + a footer offset for
+        the file's total size is the only well-formed minimum."""
+        # Build PE with header + a single BSS section (no raw bytes).
+        sec_bss = struct.pack(
+            "<8sIIIIIIHHI",
+            b".bss\x00\x00\x00\x00",
+            0x100,  # VirtualSize
+            0x1000,  # VirtualAddress
+            0,  # SizeOfRawData
+            0,  # PointerToRawData
+            0,
+            0,
+            0,
+            0,
+            0xC0000080,  # READ | WRITE | UNINITIALIZED_DATA
+        )
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F | 0x2000,  # IMAGE_FILE_DLL
+        )
+        opt = _opt_header_pe32(entry_rva=0)
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_bss
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header)
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+        import yaml
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-bssonly-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "bssonly.dll"
+            dll.write_bytes(bytes(buf))
+            create_win32_config(dll, bytes(buf))
+            doc = yaml.safe_load((tmpdir / "bssonly.dll.yaml").read_text())
+            # Should have a header segment and the BSS segment.
+            segs = doc["segments"]
+            self.assertTrue(
+                any(isinstance(s, dict) and s.get("type") == "header" for s in segs)
+            )
+            self.assertTrue(
+                any(isinstance(s, dict) and s.get("type") == "bss" for s in segs)
+            )
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigPhantomSectionTest(unittest.TestCase):
+    def test_section_with_zero_raw_pointer_treated_as_bss(self):
+        """A section whose SizeOfRawData > 0 but PointerToRawData == 0
+        is loader-classified as uninitialised data — the file simply
+        doesn't back its bytes. Auto-config must not emit a file-backed
+        segment pointing at offset 0 (which is the DOS header) for it."""
+        text_body = b"\x90" * FILE_ALIGN
+        # Build PE with .text + a "phantom" section claiming raw_size
+        # but raw_pointer = 0.
+        sec_text = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        # Hand-craft .bss: raw_size=0x40, raw_pointer=0, virtual_size=0x80.
+        sec_bss = struct.pack(
+            "<8sIIIIIIHHI",
+            b".bss\x00\x00\x00\x00",
+            0x80,  # VirtualSize
+            0x2000,  # VirtualAddress
+            0x40,  # SizeOfRawData (nonzero!)
+            0,  # PointerToRawData (zero — phantom)
+            0,
+            0,
+            0,
+            0,
+            0xC0000040,  # READ | WRITE | INITIALIZED_DATA (no UNINIT flag)
+        )
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            2,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec_text + sec_bss
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + text_body
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-phantom-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "phantom.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "phantom.exe.yaml").read_text()
+            # The phantom .bss should NOT be emitted as a file-backed
+            # `type: code` subsegment (which would point at offset 0).
+            self.assertNotIn("[0x0, ", yaml_txt)
+            # It SHOULD appear as a bss segment.
+            self.assertIn("name: bss,", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigResourceOnlyDllTest(unittest.TestCase):
+    def test_pure_resource_dll_produces_loadable_yaml(self):
+        """A resource-only DLL has no entrypoint, no exports, no imports
+        — just a .rsrc section. create_win32_config should produce a
+        symbol_addrs.txt that contains no broken symbol entries (could
+        be empty) and a YAML that splat can load without errors."""
+        rsrc_body = b"\x00" * 0x40  # placeholder; not a real .rsrc tree
+        body = b"\x90" * FILE_ALIGN
+        # PE32 DLL: no exports/imports/relocs/etc., one .rsrc section.
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F | 0x2000,  # IMAGE_FILE_DLL
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0))
+        sec_rsrc = _section_header(
+            b".rsrc", 0x40, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x40000040
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec_rsrc
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + body[: len(rsrc_body)].ljust(FILE_ALIGN, b"\x00")
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+        import yaml
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-rsrc-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            dll = tmpdir / "resonly.dll"
+            dll.write_bytes(bytes(buf))
+            create_win32_config(dll, bytes(buf))
+            sym = (tmpdir / "symbol_addrs.txt").read_text()
+            # No entrypoint = (entry was 0); no exports / imports.
+            self.assertNotIn("entrypoint =", sym)
+            # symbol_addrs has the splat preamble comment but no actual
+            # symbol bindings (no `name = 0x... ;` lines).
+            content_lines = [
+                ln
+                for ln in sym.splitlines()
+                if ln.strip() and not ln.lstrip().startswith("//")
+            ]
+            self.assertEqual(content_lines, [])
+            # YAML must be loadable.
+            yaml_doc = yaml.safe_load((tmpdir / "resonly.dll.yaml").read_text())
+            self.assertEqual(yaml_doc["options"]["platform"], "win32")
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigBinClassificationTest(unittest.TestCase):
+    def test_reloc_and_rsrc_sections_classified_as_bin(self):
+        """`.reloc` and `.rsrc` are structured loader-time data — their
+        bytes aren't meaningful as pointers / strings to disassembly.
+        Auto-config should mark them as opaque `bin` subsegments to
+        skip the string/pointer heuristics that would otherwise emit
+        misleading `.long`/`.asciz` directives over their content."""
+        body = b"\x90" * FILE_ALIGN
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": body,
+                },
+                {
+                    "name": b".rsrc",
+                    "vsize": 0x10,
+                    "vaddr": 0x2000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x40000040,
+                    "body": body,
+                },
+                {
+                    "name": b".reloc",
+                    "vsize": 0x10,
+                    "vaddr": 0x3000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 3,
+                    "chars": 0x42000040,
+                    "body": body,
+                },
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-bin-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "bn.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_txt = (tmpdir / "bn.exe.yaml").read_text()
+            self.assertIn(", bin, rsrc_main", yaml_txt)
+            self.assertIn(", bin, reloc_main", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigPdataDetectTest(unittest.TestCase):
+    def test_pe32_plus_pdata_section_gets_pdata_subtype(self):
+        """A PE32+ `.pdata` section should produce a `type: pdata`
+        subsegment so RUNTIME_FUNCTION rows render structured by
+        Win32SegPdata instead of as opaque bytes."""
+        # Minimal PE32+ with .text + .pdata.
+        body = b"\x90" * FILE_ALIGN
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": body,
+                },
+                {
+                    "name": b".pdata",
+                    "vsize": 0x10,
+                    "vaddr": 0x2000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x40000040,
+                    "body": body,
+                },
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-pdata-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "pd.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_txt = (tmpdir / "pd.exe.yaml").read_text()
+            # The .pdata section's subsegment should be type pdata.
+            self.assertIn(", pdata,", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_pe32_pdata_section_not_recognised(self):
+        """PE32 doesn't have a meaningful .pdata (RUNTIME_FUNCTION layout
+        only exists for x64 SEH). Even a section named .pdata in PE32
+        should fall through to rodata classification."""
+        body = b"\x00" * FILE_ALIGN
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * FILE_ALIGN,
+                },
+                {
+                    "name": b".pdata",
+                    "vsize": 0x10,
+                    "vaddr": 0x2000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN * 2,
+                    "chars": 0x40000040,
+                    "body": body,
+                },
+            ],
+        )
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-pdata32-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "pd32.exe"
+            exe.write_bytes(pe_bytes)
+            create_win32_config(exe, pe_bytes)
+            yaml_txt = (tmpdir / "pd32.exe.yaml").read_text()
+            self.assertNotIn(", pdata,", yaml_txt)
+            self.assertIn(", rodata,", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigMsvcAutoDetectTest(unittest.TestCase):
+    def test_linker_major_picks_matching_compiler_tag(self):
+        """The PE optional header's MajorLinkerVersion identifies which
+        MSVC linker produced the binary. The generated YAML's `compiler:`
+        line should reflect that so consumers don't have to override."""
+        # Force linker_major = 14 (MSVC14, VS 2015-2022).
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        # MajorLinkerVersion at offset 2, MinorLinkerVersion at 3.
+        struct.pack_into("<BB", opt, 2, 14, 30)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-msvc-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "vs2019.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "vs2019.exe.yaml").read_text()
+            self.assertIn("compiler: MSVC14", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_unknown_linker_major_falls_back_to_msvc6(self):
+        """A binary whose linker version isn't in the lookup table (e.g.
+        a custom packer's value) should still produce a valid splat
+        config — default to MSVC6 since it's the lowest registered."""
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        struct.pack_into("<BB", opt, 2, 99, 0)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-msvc-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            exe = tmpdir / "weird.exe"
+            exe.write_bytes(bytes(buf))
+            create_win32_config(exe, bytes(buf))
+            yaml_txt = (tmpdir / "weird.exe.yaml").read_text()
+            self.assertIn("compiler: MSVC6", yaml_txt)
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigPathologicalBasenameTest(unittest.TestCase):
+    def test_empty_basename_after_sanitization_falls_back(self):
+        """A filename composed entirely of characters that
+        remove_invalid_path_characters strips would otherwise produce a
+        bare '.yaml' / '.ld' output. Fall back to a synthetic basename."""
+        # Build a minimal PE.
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-empty-"))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            # All-spaces filename — `basename.replace(" ", "")` returns ''
+            # (Path treats the path as a child of tmpdir even when the
+            # name is whitespace).
+            sp_path = tmpdir / "  "
+            sp_path.write_bytes(bytes(buf))
+            create_win32_config(sp_path, bytes(buf))
+            # Synthetic fallback basename used since cleaned_basename = ''.
+            self.assertTrue((tmpdir / "pe_target.yaml").exists())
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class CreateConfigYamlQuotedPathTest(unittest.TestCase):
+    def test_target_path_with_spaces_is_quoted_in_yaml(self):
+        """A binary located at a path with spaces (or colons / hashes —
+        all YAML-significant unquoted) must not corrupt the generated
+        splat YAML. The header writes `target_path` as a quoted string."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        body = b"\x00" * 0x10
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            2,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec1 = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        sec2 = _section_header(
+            b".rdata", 0x10, rdata_rva, FILE_ALIGN, rdata_rptr, 0x40000040
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec1 + sec2
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN + body.ljust(FILE_ALIGN, b"\x00")
+
+        import os
+        import tempfile
+        import shutil
+        from src.splat.scripts.create_config import create_win32_config
+        from pathlib import Path as _P
+        import yaml
+
+        tmpdir = _P(tempfile.mkdtemp(prefix="splat-create-spaces "))
+        cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            sp_path = tmpdir / "exe with spaces.dll"
+            sp_path.write_bytes(bytes(buf))
+            create_win32_config(sp_path, bytes(buf))
+            yaml_path = tmpdir / "exewithspaces.dll.yaml"
+            text = yaml_path.read_text()
+            # Quoted: surrounded by double quotes.
+            self.assertRegex(text, r'target_path:\s*"[^"]+exe with spaces\.dll"')
+            # And the YAML must still parse cleanly.
+            doc = yaml.safe_load(text)
+            self.assertIn("exe with spaces.dll", doc["options"]["target_path"])
+        finally:
+            os.chdir(cwd)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+class Win32DecodeFlagsLeftoverTest(unittest.TestCase):
+    def test_unknown_bits_surface_in_decoded_flags(self):
+        """When a binary sets a Characteristics or DllCharacteristics
+        bit that isn't in our static lookup table, the decoder must
+        surface it rather than dropping it silently (which would hide
+        a feature flag the analyst needs to know about)."""
+        from src.splat.segtypes.win32.header import _decode_flags
+
+        table = [
+            (0x0001, "EXECUTABLE_IMAGE"),
+            (0x2000, "DLL"),
+        ]
+        # Mixed: one known bit + one unknown bit (0x4000).
+        decoded = _decode_flags(0x6001, table)
+        self.assertIn("EXECUTABLE_IMAGE", decoded)
+        self.assertIn("DLL", decoded)
+        self.assertIn("unknown 0x4000", decoded)
+
+    def test_no_leftover_when_all_bits_known(self):
+        from src.splat.segtypes.win32.header import _decode_flags
+
+        table = [(0x0001, "A"), (0x0002, "B")]
+        self.assertEqual(_decode_flags(0x0003, table), "A | B")
+
+    def test_none_when_value_is_zero(self):
+        from src.splat.segtypes.win32.header import _decode_flags
+
+        table = [(0x0001, "A")]
+        self.assertEqual(_decode_flags(0, table), "(none)")
+
+
+class Win32HeaderTruncatedOptionalHeaderTest(unittest.TestCase):
+    def test_truncated_optional_header_falls_back_to_byte_block(self):
+        """A malformed PE with SizeOfOptionalHeader smaller than the
+        standard 0xE0 layout used to emit zero-width
+        `.short ''`/`.long ''` directives that GAS rejects.
+        After truncation is detected the remainder is dumped as a raw
+        byte block once — subsequent emit_* calls return silently."""
+        from src.splat.segtypes.win32.header import Win32SegHeader
+
+        # Declare a runt optional header — fits Magic + linker bytes +
+        # SizeOfCode + SizeOfInitData (16 bytes total), then truncated
+        # mid-SizeOfUninitData. The bounds check fires there.
+        size_of_opt = 0x10
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt_runt = bytearray(size_of_opt)
+        opt_runt[0:2] = struct.pack("<H", 0x010B)  # Magic = PE32
+        opt_runt[2] = 6  # MajorLinkerVersion
+        # Fields past offset 4 are unspecified zeros — that's the runt.
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt_runt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+
+        seg = object.__new__(Win32SegHeader)
+        # opt_off = DOS(0x40) + PE\0\0(4) + COFF(0x14) = 0x58
+        opt_off = 0x40 + 4 + 0x14
+        opt_end = opt_off + size_of_opt
+        lines = seg._dump_optional_header(bytes(buf), opt_off, opt_end)
+        # No empty `.short`/`.long` directives.
+        for ln in lines:
+            self.assertNotRegex(ln, r'\.(short|long|quad)\s+""\s*$')
+        # Truncation must short-circuit subsequent emits — fields past
+        # SizeOfUninitializedData (i.e. AddressOfEntryPoint onward)
+        # should NOT appear in output.
+        joined = "\n".join(lines)
+        self.assertNotIn("AddressOfEntryPoint", joined)
+        self.assertNotIn("ImageBase", joined)
+
+
+class Win32HeaderDataDirCountCapTest(unittest.TestCase):
+    def test_oversize_num_dirs_in_header_dump_is_capped(self):
+        """The structured header dump iterates NumberOfRvaAndSizes
+        directory entries. A fuzzed value (e.g. 0xFFFFFFFF) must not
+        trigger a 4-billion-iteration loop in `_dump_optional_header`.
+        Real iteration is bounded by both the declared count cap (256)
+        and the per-iteration opt_end check."""
+        from src.splat.segtypes.win32.header import Win32SegHeader
+
+        # Minimal PE32 buffer where SizeOfOptionalHeader covers exactly
+        # the standard header + 16 data directories. Force NumberOfRvaAndSizes
+        # to 0xFFFFFFFF.
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        # NumberOfRvaAndSizes field at offset 92.
+        struct.pack_into("<I", opt, 92, 0xFFFFFFFF)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+
+        seg = object.__new__(Win32SegHeader)
+        opt_off = 0x40 + 4 + 0x14  # DOS+PE+sig+COFF header
+        opt_end = opt_off + size_of_opt
+        # No exception, no infinite loop. Returns within milliseconds.
+        lines = seg._dump_optional_header(bytes(buf), opt_off, opt_end)
+        # Should have emitted at most opt_end // 8 directory entries,
+        # well below 256, and definitely below 4 billion.
+        self.assertLess(len(lines), 256)
+
+
+class Win32HeaderWidthTest(unittest.TestCase):
+    def test_pe32_plus_imagebase_padded_to_16_hex(self):
+        """For PE32+ the ImageBase is 64-bit (e.g. 0x140000000 — 9 hex
+        digits). The header summary should pad to 16 hex digits so VAs
+        line up consistently with EntryPoint and section VA columns."""
+        from src.splat.segtypes.win32.header import Win32SegHeader
+        from src.splat.platforms import win32 as win32_platform
+
+        old_info = win32_platform.info
+        try:
+            pe = win32_platform.PEInfo(
+                is_pe32_plus=True,
+                image_base=0x140000000,
+                entry_point_rva=0x1000,
+                machine=0x8664,
+                subsystem=3,
+            )
+            win32_platform.info = pe
+            seg = object.__new__(Win32SegHeader)
+            lines = seg._summary_block(pe)
+            block = "\n".join(lines)
+            self.assertIn("0x0000000140000000", block)
+            self.assertIn("0x0000000140001000", block)
+        finally:
+            win32_platform.info = old_info
+
+    def test_pe32_imagebase_padded_to_8_hex(self):
+        """PE32 keeps the legacy 8-digit width since ImageBase fits in 32
+        bits — verifies we didn't regress the common case while adding
+        PE32+ support."""
+        from src.splat.segtypes.win32.header import Win32SegHeader
+        from src.splat.platforms import win32 as win32_platform
+
+        old_info = win32_platform.info
+        try:
+            pe = win32_platform.PEInfo(
+                is_pe32_plus=False,
+                image_base=0x00400000,
+                entry_point_rva=0x1000,
+                machine=0x014C,
+                subsystem=3,
+            )
+            win32_platform.info = pe
+            seg = object.__new__(Win32SegHeader)
+            lines = seg._summary_block(pe)
+            block = "\n".join(lines)
+            self.assertIn("0x00400000", block)
+            self.assertIn("0x00401000", block)
+            self.assertNotIn("0x0000000000400000", block)
+        finally:
+            win32_platform.info = old_info
+
+
+class CapstoneDisassemblerTest(unittest.TestCase):
+    def test_known_types_includes_primitive_set(self):
+        """CapstoneDisassembler.known_types returns the spimdisasm-mirror
+        primitive vocabulary so symbol_addrs.txt `type:u32` /
+        `type:asciz` entries get accepted by splat's check_valid_type."""
+        from src.splat.disassembler.capstone_disassembler import (
+            CapstoneDisassembler,
+        )
+
+        d = CapstoneDisassembler()
+        kt = d.known_types()
+        for t in ("u8", "u32", "s32", "f32", "char", "asciz"):
+            self.assertIn(t, kt)
+
+    def test_engine_lazy_creation_uses_pe_bitness(self):
+        """get_engine() picks CS_MODE_32 / CS_MODE_64 from the parsed
+        PE's is_pe32_plus flag — checks deferred-init path."""
+        import capstone
+        from src.splat.disassembler.capstone_disassembler import (
+            CapstoneDisassembler,
+        )
+
+        d = CapstoneDisassembler()
+
+        # Save / restore the global PE info so we don't bleed state.
+        old_info = win32_platform.info
+        try:
+            # PE32 — should get 32-bit mode.
+            win32_platform.info = win32_platform.PEInfo(is_pe32_plus=False)
+            d._md = None  # force recreation
+            md = d.get_engine()
+            self.assertEqual(md.mode, capstone.CS_MODE_32)
+
+            # PE32+ — should get 64-bit mode.
+            win32_platform.info = win32_platform.PEInfo(is_pe32_plus=True)
+            d._md = None
+            md = d.get_engine()
+            self.assertEqual(md.mode, capstone.CS_MODE_64)
+        finally:
+            win32_platform.info = old_info
+
+
+class Win32SubsystemNamesTest(unittest.TestCase):
+    def test_subsystem_table_coverage(self):
+        """Subsystem ID → name mapping must cover the common values
+        emitted by Windows linkers."""
+        from src.splat.segtypes.win32.header import _SUBSYSTEMS
+
+        expected = {
+            1: "NATIVE",
+            2: "WINDOWS_GUI",
+            3: "WINDOWS_CUI",
+            5: "OS2_CUI",
+            7: "POSIX_CUI",
+            10: "EFI_APPLICATION",
+            14: "XBOX",
+        }
+        for sid, name in expected.items():
+            self.assertEqual(_SUBSYSTEMS.get(sid), name, f"Subsystem {sid}")
+
+
+class Win32ResourceTypeNamesTest(unittest.TestCase):
+    def test_standard_resource_type_names(self):
+        """RESOURCE_TYPE_NAMES covers all standard Win32 resource IDs
+        (winuser.h RT_*) so the header summary renders them by name
+        instead of as `TYPE_<n>`."""
+        from src.splat.platforms.win32 import RESOURCE_TYPE_NAMES
+
+        # Spot-check a representative slice from the standard set.
+        expected = {
+            1: "CURSOR",
+            2: "BITMAP",
+            3: "ICON",
+            4: "MENU",
+            5: "DIALOG",
+            6: "STRING",
+            14: "GROUP_ICON",
+            16: "VERSION",
+            24: "MANIFEST",
+        }
+        for rid, name in expected.items():
+            self.assertEqual(RESOURCE_TYPE_NAMES.get(rid), name, f"RT id {rid}")
+
+
+class Win32LinkerVersionTest(unittest.TestCase):
+    def test_linker_version_label_known_majors(self):
+        """linker_version_label maps each MSVC major version to a
+        recognizable product name; unknown majors fall back to a
+        plain `linker vN.NN` rendering."""
+        from src.splat.platforms.win32 import linker_version_label
+
+        self.assertEqual(linker_version_label(6, 0), "MSVC 6.0")
+        self.assertEqual(linker_version_label(14, 34), "MSVC 14.x / VS 2015-2022")
+        self.assertEqual(linker_version_label(99, 5), "linker v99.05")
+
+
+class Win32SegHeaderUtilsTest(unittest.TestCase):
+    """Targeted tests for the small helpers in header.py — these wrap
+    `_decode_flags` and the Subsystem table."""
+
+    def test_decode_characteristics_flags(self):
+        from src.splat.segtypes.win32.header import (
+            _decode_flags,
+            _FILE_CHARACTERISTICS,
+            _DLL_CHARACTERISTICS,
+        )
+
+        self.assertEqual(_decode_flags(0, _FILE_CHARACTERISTICS), "(none)")
+        # DLL | EXECUTABLE_IMAGE | 32BIT_MACHINE
+        out = _decode_flags(0x2102, _FILE_CHARACTERISTICS)
+        self.assertIn("EXECUTABLE_IMAGE", out)
+        self.assertIn("32BIT_MACHINE", out)
+        self.assertIn("DLL", out)
+        # DllCharacteristics
+        out = _decode_flags(0x0140, _DLL_CHARACTERISTICS)
+        self.assertIn("DYNAMIC_BASE", out)
+        self.assertIn("NX_COMPAT", out)
+
+
+class PEEdgeCasesTest(unittest.TestCase):
+    """Defensive parse tests for malformed inputs and unusual but valid PEs."""
+
+    def test_read_cstr_truncation(self):
+        """Strings with no NUL terminator get capped at _MAX_CSTR_LEN."""
+        big_blob = b"A" * (win32_platform._MAX_CSTR_LEN + 100)
+        result = win32_platform._read_cstr(big_blob, 0)
+        self.assertEqual(len(result), win32_platform._MAX_CSTR_LEN)
+
+    def test_read_cstr_past_end(self):
+        """Out-of-bounds offsets return an empty string, not a crash."""
+        self.assertEqual(win32_platform._read_cstr(b"", 0), "")
+        self.assertEqual(win32_platform._read_cstr(b"ABC", 99), "")
+        self.assertEqual(win32_platform._read_cstr(b"ABC", -1), "")
+
+    def test_resolve_va_to_file_offset_out_of_range(self):
+        """VA outside any section returns None."""
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        # VA way outside the loaded image.
+        self.assertIsNone(pe.va_to_file_offset(0x7FFFFFFF))
+        self.assertIsNone(pe.rva_to_file_offset(0x7FFFFFFF))
+
+    def test_truncated_dos_header(self):
+        """File shorter than the 64-byte DOS header must be rejected
+        explicitly (not raise a struct.error in the parser)."""
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(b"MZ" + b"\x00" * 10)
+
+    def test_generic_unsupported_machine_rejected(self):
+        """Non-x86 / non-ARM machine codes (MIPS, PowerPC, Alpha, etc.)
+        also hit the win32 init() rejection — generic message rather
+        than the ARM-specific one."""
+        # Machine = 0x0166 (R3000-LE) → not i386/amd64/arm64/arm32.
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x0166,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        blob = bytes(header).ljust(FILE_ALIGN * 2, b"\x00")
+        # parse_pe still works (architecture-neutral).
+        pe = win32_platform.parse_pe(blob)
+        self.assertEqual(pe.machine, 0x0166)
+        with self.assertRaises(SystemExit):
+            win32_platform.init(blob)
+
+    def test_arm64_machine_rejected_at_init(self):
+        """ARM64 PE binaries parse cleanly through parse_pe (structures
+        are architecture-neutral) but init() must reject them with an
+        explicit message — the disassembler is x86-only."""
+        # Hand-build a tiny PE32+ with Machine = 0xAA64 (ARM64).
+        coff = struct.pack(
+            "<HHIIIHH",
+            0xAA64,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xF0,
+            0x002F,
+        )
+        opt = _opt_header_pe32_plus(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        blob = bytes(header).ljust(FILE_ALIGN * 2, b"\x00")
+        # parse_pe alone is OK — structures are arch-neutral.
+        pe = win32_platform.parse_pe(blob)
+        self.assertEqual(pe.machine, 0xAA64)
+        # init() escalates to log.error → SystemExit because the
+        # downstream x86 capstone backend can't disassemble ARM64.
+        with self.assertRaises(SystemExit):
+            win32_platform.init(blob)
+
+    def test_section_table_truncated(self):
+        """NumberOfSections * 40 must fit between the end of the optional
+        header and EOF. Declaring 10 sections but only providing 2 must
+        be caught explicitly."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            10,  # claim 10 sections
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        # Only enough space for 2 section headers — half of the declared.
+        sec = b"\x00" * (40 * 2)
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        # Truncate so the remaining 8 sections can't fit.
+        header = header[: 0x40 + 4 + 20 + 0xE0 + 40 * 2]
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(header)
+
+    def test_zero_optional_header_size(self):
+        """SizeOfOptionalHeader == 0 means there's no optional header
+        at all (technically valid only for COFF object files, not PE
+        images). Parser must reject before attempting to read the
+        2-byte magic field."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            0,
+            0x12345678,
+            0,
+            0,
+            0,
+            0x010F,
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(bytes(header))
+
+    def test_sub_minimum_optional_header_size(self):
+        """Optional header smaller than the per-format minimum (96 for
+        PE32, 112 for PE32+) can't fit the data-directory offsets we
+        expect to read. Parser must reject."""
+        # PE32 optional header but declared size 32 (less than the 96
+        # required for windows-specific + data-directories).
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            0,
+            0x12345678,
+            0,
+            0,
+            32,
+            0x010F,
+        )
+        # First 32 bytes of a PE32 optional header (just standard fields).
+        opt = struct.pack(
+            "<HBBIIIIIII",
+            0x010B,
+            6,
+            0,
+            0x200,
+            0x200,
+            0,
+            0x1000,
+            0x1000,
+            0x2000,
+            0x400000,
+        )
+        assert len(opt) == 32
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(bytes(header))
+
+    def test_unknown_optional_header_magic(self):
+        """Optional header magic must be 0x10B (PE32) or 0x20B (PE32+).
+        Older formats like ROM image (0x107) or future values must be
+        rejected before we try to read PE32-specific offsets."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        # Build a valid-looking PE32 optional header then overwrite the
+        # magic to 0x107 (ROM image, unsupported).
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        struct.pack_into("<H", opt, 0, 0x0107)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(bytes(header).ljust(FILE_ALIGN * 2, b"\x00"))
+
+    def test_truncated_after_pe_signature(self):
+        """File large enough to hold MZ + e_lfanew + 'PE\\0\\0' but not
+        the trailing 20-byte COFF file header must be rejected before
+        struct.unpack_from would walk past EOF."""
+        dos = bytearray(64)
+        dos[0:2] = b"MZ"
+        dos[0x3C:0x40] = struct.pack("<I", 0x40)
+        # 'PE\0\0' present but only ~8 trailing bytes — not enough for the
+        # 20-byte IMAGE_FILE_HEADER that follows.
+        blob = bytes(dos) + b"PE\x00\x00" + b"\x00" * 8
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(blob)
+
+    def test_pe32_plus_amd64_magic_combination_accepted(self):
+        """AMD64 Machine + PE32+ Magic 0x20B must be accepted. Pairs
+        with the mismatch test — confirms the cross-validation only
+        rejects genuine mismatches, not all PE32+ pairings."""
+        pe_bytes = _build_pe_plus(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+            ],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertEqual(pe.machine, 0x8664)
+        self.assertTrue(pe.is_pe32_plus)
+        self.assertEqual(pe.image_base, 0x140000000)
+
+    def test_machine_magic_mismatch(self):
+        """A PE file with i386 Machine but PE32+ optional header magic
+        (or vice-versa) must be rejected — both fields encode pointer
+        size and they must agree."""
+        # i386 Machine + PE32+ magic.
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xF0,
+            0x010F,
+        )
+        # PE32+ optional header (240 bytes) but Machine claims i386.
+        opt = _opt_header_pe32_plus(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(bytes(header).ljust(FILE_ALIGN * 2, b"\x00"))
+
+    def test_missing_pe_signature(self):
+        """An MZ header pointing at non-PE bytes must be rejected."""
+        # Valid DOS header pointing e_lfanew at 0x40, but the bytes there
+        # are not "PE\0\0".
+        dos = bytearray(64)
+        dos[0:2] = b"MZ"
+        dos[0x3C:0x40] = struct.pack("<I", 0x40)
+        blob = bytes(dos) + b"NE\x00\x00" + b"\x00" * 100
+        with self.assertRaises(SystemExit):
+            win32_platform.parse_pe(blob)
+
+    def test_coff_symbol_table_recorded(self):
+        """PE binaries essentially never set PointerToSymbolTable
+        + NumberOfSymbols (debug info goes in the external .pdb), but
+        when they do, those fields surface on PEInfo for the header
+        summary to flag."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0x12345,
+            42,  # PointerToSymbolTable + NumberOfSymbols
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+        pe = win32_platform.parse_pe(bytes(buf))
+        self.assertEqual(pe.coff_symtab_ptr, 0x12345)
+        self.assertEqual(pe.coff_num_symbols, 42)
+
+    def test_zero_sections(self):
+        """Synthetic PE with NumberOfSections=0 — parser should accept
+        and downstream rva_to_file_offset should return None for any
+        non-trivial RVA without crashing."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            0,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0, data_dirs=())
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        pe = win32_platform.parse_pe(bytes(header))
+        self.assertEqual(pe.num_sections, 0)
+        self.assertEqual(len(pe.sections), 0)
+        self.assertIsNone(pe.rva_to_file_offset(0x1000))
+
+    def test_no_entrypoint(self):
+        """Resource-only DLL has entry_point_rva == 0."""
+        # We need to override the standard `_build_pe` entry_rva=0x1000
+        # default. Bypass by constructing fields directly.
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F | 0x2000,  # add DLL flag
+        )
+        opt = _opt_header_pe32(entry_rva=0, data_dirs=())
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+        pe = win32_platform.parse_pe(bytes(buf))
+        self.assertEqual(pe.entry_point_rva, 0)
+        # Should not crash later when consumers check entry_point_va.
+
+    def test_safeseh_count_capped(self):
+        """Fuzzed LoadConfig with SEHandlerCount = 0xFFFFFFFF must not
+        loop 4 billion times. Parser caps at 1M; loop body's per-entry
+        bounds check then terminates early on EOF."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        seh_table_va = IMAGE_BASE + rdata_rva + 0x50
+        lc_size = 0x48
+        body = bytearray(0x100)
+        struct.pack_into("<I", body, 0, lc_size)
+        struct.pack_into("<I", body, 0x3C, IMAGE_BASE + rdata_rva + 0x60)
+        struct.pack_into("<I", body, 0x40, seh_table_va)
+        struct.pack_into("<I", body, 0x44, 0xFFFFFFFF)
+        # Two real RVAs followed by garbage / unmapped territory.
+        struct.pack_into("<II", body, 0x50, 0x1010, 0x1020)
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x100,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x100,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x100,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(0, 0)] * 10 + [(rdata_rva, lc_size)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        # Bounded reads stop once EOF is hit; we still cap iteration.
+        self.assertLessEqual(len(pe.safe_seh_handlers), 1_000_000)
+        # First two genuine RVAs survive.
+        self.assertEqual(pe.safe_seh_handlers[:2], [0x1010, 0x1020])
+
+    def test_export_count_capped(self):
+        """Fuzzed export directory with NumberOfFunctions = 0xFFFFFFFF.
+        Parser must cap iteration to a sane limit so it doesn't scan
+        gigabytes of file looking for non-existent function RVAs."""
+        rdata_rva = 0x2000
+        rdata_rptr = FILE_ALIGN * 2
+        funcs_rva = rdata_rva + 0x28
+        dll_name_rva = rdata_rva + 0x30
+        body = bytearray(0x80)
+        struct.pack_into(
+            "<IIHHIIIIIII",
+            body,
+            0x00,
+            0,
+            0,
+            0,
+            0,
+            dll_name_rva,
+            1,
+            0xFFFFFFFF,  # num funcs — absurd
+            0,
+            funcs_rva,
+            0,
+            0,
+        )
+        struct.pack_into("<I", body, 0x28, 0x1000)
+        body[0x30 : 0x30 + 11] = b"FuzzDll\x00\x00\x00\x00"
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+                {
+                    "name": b".rdata",
+                    "vsize": 0x80,
+                    "vaddr": rdata_rva,
+                    "rsize": FILE_ALIGN,
+                    "rptr": rdata_rptr,
+                    "chars": 0x40000040,
+                    "body": bytes(body),
+                },
+            ],
+            data_dirs=[(rdata_rva, 0x80)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        # Cap = 65536. Loop exits early once funcs_off + i*4 + 4 > len(data).
+        # Should not loop 4 billion times.
+        self.assertLessEqual(len(pe.exports), 65536)
+
+    def test_number_of_sections_capped_at_96(self):
+        """PE spec caps NumberOfSections at 96. Parser must not iterate
+        a fuzzed huge value (e.g. 0xFFFF) past the cap. Provide enough
+        bytes for the cap (96 * 40 = 3840) but declare 0xFFFF sections."""
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            0xFFFF,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = _opt_header_pe32(entry_rva=0x1000)
+        # Exactly 96 zeroed section headers — enough to hit the cap
+        # without going past EOF.
+        sec = b"\x00" * (40 * 96)
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        pe = win32_platform.parse_pe(bytes(header))
+        # Capped at 96 even though field claimed 65535.
+        self.assertEqual(len(pe.sections), 96)
+        self.assertEqual(pe.num_sections, 0xFFFF)  # raw field preserved
+
+    def test_small_image_base(self):
+        """Some embedded/specialty PEs use a tiny ImageBase (e.g. 0x10000
+        for early drivers) instead of the standard 0x400000. Parser must
+        not assume any specific value, and entry_point_va must compose
+        correctly."""
+        # Standard PE32 with ImageBase overridden to 0x10000.
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            0xE0,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        # ImageBase field is at offset 28 in PE32 optional header.
+        struct.pack_into("<I", opt, 28, 0x10000)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+        pe = win32_platform.parse_pe(bytes(buf))
+        self.assertEqual(pe.image_base, 0x10000)
+        self.assertEqual(pe.entry_point_va, 0x11000)  # base + entry RVA
+
+    def test_excessive_data_directory_count_capped(self):
+        """Some malformed/fuzzed PEs declare NumberOfRvaAndSizes far
+        beyond the canonical 16. Parser must cap iteration at 16 rather
+        than walking past the optional header into section territory."""
+        # Standard PE32 optional header (96 bytes) plus 16 data dir
+        # slots — total 224 = 0xE0 — but NumberOfRvaAndSizes claims 9999.
+        size_of_opt = 0xE0
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt,
+            0x010F,
+        )
+        opt = bytearray(_opt_header_pe32(entry_rva=0x1000))
+        # NumberOfRvaAndSizes is the LAST DWORD before the data
+        # directory array — at offset 92 within the optional header.
+        struct.pack_into("<I", opt, 92, 9999)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + bytes(opt) + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+        pe = win32_platform.parse_pe(bytes(buf))
+        # Iteration capped at 16 even though field says 9999.
+        self.assertEqual(len(pe.data_directories), 16)
+
+    def test_sparse_data_directory_count(self):
+        """An optional header can declare NumberOfRvaAndSizes < 16
+        (rare, older MSVC). Parser should walk only the declared count
+        and leave higher-index lookups defensive."""
+        # We construct an optional header by hand with NumberOfRvaAndSizes=2
+        # — only Export + Import slots are real; the parser must not try
+        # to read Resource / Exception / etc.
+        size_of_opt_hdr = 96 + 16  # standard PE32 fields + 2 data dirs
+        coff = struct.pack(
+            "<HHIIIHH",
+            0x014C,
+            1,
+            0x12345678,
+            0,
+            0,
+            size_of_opt_hdr,
+            0x010F,
+        )
+        # Standard fields (28) + windows (68) — last field is
+        # NumberOfRvaAndSizes which we set to 2.
+        opt = struct.pack(
+            "<HBBIIIIIIIIIHHHHHHIIIIHHIIIIII",
+            0x010B,
+            6,
+            0,
+            0x200,
+            0x200,
+            0,
+            0x1000,
+            0x1000,
+            0x2000,
+            0x400000,
+            0x1000,
+            0x200,
+            4,
+            0,
+            0,
+            0,
+            4,
+            0,
+            0,
+            0x4000,
+            0x200,
+            0,
+            3,
+            0,
+            0x100000,
+            0x1000,
+            0x100000,
+            0x1000,
+            0,
+            2,  # NumberOfRvaAndSizes = 2
+        )
+        # Just two directory entries (Export + Import, both empty).
+        opt += struct.pack("<IIII", 0, 0, 0, 0)
+        assert len(opt) == size_of_opt_hdr, (len(opt), size_of_opt_hdr)
+        sec = _section_header(
+            b".text", 0x10, 0x1000, FILE_ALIGN, FILE_ALIGN, 0x60000020
+        )
+        header = DOS_STUB + b"PE\x00\x00" + coff + opt + sec
+        header = header.ljust(FILE_ALIGN, b"\x00")
+        buf = bytearray(header) + b"\x90" * FILE_ALIGN
+        pe = win32_platform.parse_pe(bytes(buf))
+        # Only 2 data directories populated.
+        self.assertEqual(len(pe.data_directories), 2)
+        # All sub-parsers that need higher indices should no-op.
+        self.assertFalse(pe.resources)
+        self.assertFalse(pe.runtime_functions)
+        self.assertFalse(pe.safe_seh_handlers)
+        self.assertFalse(pe.cfg_function_rvas)
+        self.assertFalse(pe.bound_imports)
+        self.assertFalse(pe.delay_imports)
+
+    def test_empty_data_directory_index(self):
+        """Optional header listing fewer than 16 data dirs still parses."""
+        # _build_pe always provides space for 16 entries; pass only one
+        # (zeros) and verify the others come out empty.
+        pe_bytes = _build_pe(
+            sections=[
+                {
+                    "name": b".text",
+                    "vsize": 0x10,
+                    "vaddr": 0x1000,
+                    "rsize": FILE_ALIGN,
+                    "rptr": FILE_ALIGN,
+                    "chars": 0x60000020,
+                    "body": b"\x90" * 0x10,
+                },
+            ],
+            data_dirs=[(0, 0)],
+        )
+        pe = win32_platform.parse_pe(pe_bytes)
+        self.assertFalse(pe.exports)
+        self.assertFalse(pe.imports)
+        self.assertFalse(pe.resources)
+        self.assertFalse(pe.runtime_functions)
+
+
+if __name__ == "__main__":
+    unittest.main()