diff --git a/extradocx/src/extradocx/__init__.py b/extradocx/src/extradocx/__init__.py
index 4756a411..be2df86e 100644
--- a/extradocx/src/extradocx/__init__.py
+++ b/extradocx/src/extradocx/__init__.py
@@ -16,9 +16,22 @@
 
     json_str = to_json(doc)        # full-fidelity JSON with XPath pointers
     md_str = to_markdown(doc)      # GFM markdown
+
+Markdown round-trip::
+
+    from extradocx import DocxParser, to_markdown, parse_markdown, diff
+
+    doc = DocxParser("report.docx").parse()
+    md = to_markdown(doc)
+    # ... user edits md ...
+    edited_doc = parse_markdown(edited_md)
+    ops = diff(doc, edited_doc)    # list of DiffOp
 """
 
+from extradocx.docx_apply import apply_ops
+from extradocx.md_diff import diff
+from extradocx.md_parser import parse_markdown
 from extradocx.parser import DocxParser
 from extradocx.serializers import to_json, to_markdown
 
-__all__ = ["DocxParser", "to_json", "to_markdown"]
+__all__ = ["DocxParser", "to_json", "to_markdown", "parse_markdown", "diff", "apply_ops"]
diff --git a/extradocx/src/extradocx/diff_ops.py b/extradocx/src/extradocx/diff_ops.py
new file mode 100644
index 00000000..8a5420dc
--- /dev/null
+++ b/extradocx/src/extradocx/diff_ops.py
@@ -0,0 +1,201 @@
+"""
+Diff operation types for markdown AST diffing.
+
+Each operation references a node in the **base** AST (via its xpath or index
+path) and describes how the user intended to edit the markdown.
+
+The eventual goal (not in scope here) is to project these operations back
+onto the original DOCX document.
+
+Operation types:
+
+  Block-level:
+    ReplaceHeading    — heading level or text changed
+    ReplaceParagraph  — paragraph text/formatting changed
+    ReplaceCodeBlock  — code block content or language changed
+    InsertBlock       — a new block was added (no base counterpart)
+    DeleteBlock       — a base block was removed
+    ReplaceTable      — table content changed
+    ReplaceListItem   — list item content changed
+    ReplaceList       — list structure changed (items added/removed/reordered)
+    ReplaceBlockQuote — block quote content changed
+
+  Inline-level (nested within block ops when needed):
+    ModifyText        — text content of a run changed
+    ModifyFormatting  — formatting flags of a run changed (bold, italic, …)
+
+Public API:
+    DiffOp = Union of all operation types
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Union
+
+from extradocx.ast_nodes import BlockNode, InlineNode
+
+# ---------------------------------------------------------------------------
+# Block-level operations
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class InsertBlock:
+    """A new block was inserted at a given position.
+
+    ``position`` is the index in the derived children list where the block
+    appears.  ``block`` is the full derived AST node.
+
+    ``after_xpath`` is the xpath of the **last base block that precedes this
+    insertion point** — i.e. the new block should be inserted immediately
+    after the DOCX element identified by that xpath.  Empty string means
+    insert at the very beginning of the parent container.
+    """
+
+    position: int
+    block: BlockNode
+    after_xpath: str = ""
+
+    def __repr__(self) -> str:
+        btype = type(self.block).__name__
+        return f"InsertBlock(position={self.position}, block_type={btype})"
+
+
+@dataclass
+class DeleteBlock:
+    """A block from the base AST was deleted.
+
+    ``base_index`` is the index of the deleted block in the base document's
+    children list.  ``base_xpath`` is the xpath of the deleted node (for
+    traceability back to the DOCX).
+    """
+
+    base_index: int
+    base_xpath: str
+
+    def __repr__(self) -> str:
+        return f"DeleteBlock(base_index={self.base_index}, xpath={self.base_xpath!r})"
+
+
+@dataclass
+class ReplaceHeading:
+    """A heading's level or inline content changed.
+
+    ``base_index``/``base_xpath`` identify the base node.
+    ``new_level`` and ``new_children`` carry the desired state.
+    """
+
+    base_index: int
+    base_xpath: str
+    old_level: int
+    new_level: int
+    old_text: str
+    new_text: str
+    new_children: list[InlineNode] = field(default_factory=list)
+
+
+@dataclass
+class ReplaceParagraph:
+    """A paragraph's inline content changed."""
+
+    base_index: int
+    base_xpath: str
+    old_text: str
+    new_text: str
+    new_children: list[InlineNode] = field(default_factory=list)
+
+
+@dataclass
+class ReplaceCodeBlock:
+    """A code block's content or language changed."""
+
+    base_index: int
+    base_xpath: str
+    old_code: str
+    new_code: str
+    old_language: str
+    new_language: str
+
+
+@dataclass
+class ReplaceTable:
+    """Table content changed.  Carries the full derived table node."""
+
+    base_index: int
+    base_xpath: str
+    new_rows: list  # list of TableRow from the derived AST
+
+
+@dataclass
+class ReplaceList:
+    """A list (bullet or ordered) changed — items added, removed, or edited.
+
+    ``item_ops`` describes per-item changes within the list.
+    ``new_items`` is the full derived items list.
+    """
+
+    base_index: int
+    base_xpath: str
+    list_type: str  # "bullet" or "ordered"
+    item_ops: list[ListItemOp] = field(default_factory=list)
+    new_items: list = field(default_factory=list)  # list of ListItem
+
+
+@dataclass
+class ReplaceBlockQuote:
+    """Block quote content changed."""
+
+    base_index: int
+    base_xpath: str
+    inner_ops: list[DiffOp] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# List-item level operations (nested within ReplaceList)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class InsertListItem:
+    """A new list item was inserted."""
+
+    position: int
+    item: object  # ListItem
+
+
+@dataclass
+class DeleteListItem:
+    """A list item was removed."""
+
+    base_item_index: int
+    base_xpath: str
+
+
+@dataclass
+class ReplaceListItem:
+    """A list item's content changed."""
+
+    base_item_index: int
+    base_xpath: str
+    old_text: str
+    new_text: str
+
+
+ListItemOp = Union[InsertListItem, DeleteListItem, ReplaceListItem]
+
+
+# ---------------------------------------------------------------------------
+# Union of all diff operations
+# ---------------------------------------------------------------------------
+
+DiffOp = Union[
+    InsertBlock,
+    DeleteBlock,
+    ReplaceHeading,
+    ReplaceParagraph,
+    ReplaceCodeBlock,
+    ReplaceTable,
+    ReplaceList,
+    ReplaceBlockQuote,
+]
diff --git a/extradocx/src/extradocx/docx_apply.py b/extradocx/src/extradocx/docx_apply.py
new file mode 100644
index 00000000..e21d1563
--- /dev/null
+++ b/extradocx/src/extradocx/docx_apply.py
@@ -0,0 +1,693 @@
+"""
+Apply DiffOp operations back to a DOCX file.
+
+Reads the DOCX, manipulates word/document.xml using the xpath references
+carried by each op, then writes the modified DOCX to a new path.
+
+Supported operations:
+  ReplaceParagraph  — update inline content of a paragraph
+  ReplaceHeading    — update style + inline content of a heading
+  ReplaceCodeBlock  — update text content of a code block paragraph
+  DeleteBlock       — remove a w:p or w:tbl element
+  InsertBlock       — insert a new w:p (or list of w:p) after a reference element
+  ReplaceTable      — update table cell text content
+  ReplaceList       — apply per-item ops (insert/delete/replace) to list paragraphs
+  ReplaceBlockQuote — recursively apply inner ops to block-quote paragraphs
+
+Public API:
+    apply_ops(
+        docx_path:     Path | str,
+        ops:           list[DiffOp],
+        output_path:   Path | str,
+        base_children: list[BlockNode] | None = None,
+    ) -> None
+"""
+
+from __future__ import annotations
+
+import copy
+import io
+import re
+import xml.etree.ElementTree as ET
+import zipfile
+from pathlib import Path
+from typing import Union
+
+from extradocx.ast_nodes import (
+    BlockNode,
+    BlockQuote,
+    BulletList,
+    CodeBlock,
+    Heading,
+    Image,
+    InlineNode,
+    LineBreak,
+    Link,
+    ListItem,
+    OrderedList,
+    Paragraph,
+    Table,
+    TextRun,
+    ThematicBreak,
+)
+from extradocx.diff_ops import (
+    DeleteBlock,
+    DeleteListItem,
+    DiffOp,
+    InsertBlock,
+    InsertListItem,
+    ReplaceBlockQuote,
+    ReplaceCodeBlock,
+    ReplaceHeading,
+    ReplaceList,
+    ReplaceListItem,
+    ReplaceParagraph,
+    ReplaceTable,
+)
+
+# ---------------------------------------------------------------------------
+# XML namespace constants
+# ---------------------------------------------------------------------------
+
+_W_URI = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+_XML_URI = "http://www.w3.org/XML/1998/namespace"
+W = f"{{{_W_URI}}}"
+XML = f"{{{_XML_URI}}}"
+
+_NS = {
+    "w": _W_URI,
+    "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
+    "wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
+    "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
+    "mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
+    "xml": _XML_URI,
+}
+
+# Register namespaces so ElementTree round-trips them correctly.
+for _pfx, _uri in _NS.items():
+    ET.register_namespace(_pfx, _uri)
+ET.register_namespace("w14", "http://schemas.microsoft.com/office/word/2010/wordml")
+ET.register_namespace("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+ET.register_namespace("wpc", "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas")
+ET.register_namespace("ct", "http://schemas.openxmlformats.org/package/2006/content-types")
+ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
+ET.register_namespace("dc", "http://purl.org/dc/elements/1.1/")
+ET.register_namespace(
+    "cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
+)
+ET.register_namespace(
+    "ep",
+    "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties",
+)
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def apply_ops(
+    docx_path: Union[Path, str],
+    ops: list[DiffOp],
+    output_path: Union[Path, str],
+    base_children: list[BlockNode] | None = None,
+) -> None:
+    """Apply *ops* to the DOCX at *docx_path*, writing the result to *output_path*.
+
+    Parameters
+    ----------
+    docx_path:
+        Source .docx file.
+    ops:
+        List of DiffOp from ``md_diff.diff()``.
+    output_path:
+        Destination .docx file (can equal docx_path for in-place edit).
+    base_children:
+        Optional list of BlockNodes from the base Document (used only for
+        logging / future diagnostics; not required for correctness).
+    """
+    docx_path = Path(docx_path)
+    output_path = Path(output_path)
+
+    # Read all files from the zip
+    with zipfile.ZipFile(docx_path, "r") as zf:
+        file_map: dict[str, bytes] = {name: zf.read(name) for name in zf.namelist()}
+        zip_info_map: dict[str, zipfile.ZipInfo] = {info.filename: info for info in zf.infolist()}
+
+    doc_xml = file_map.get("word/document.xml", b"")
+    if not doc_xml:
+        raise ValueError("No word/document.xml found in the DOCX archive")
+
+    # Parse XML — preserve namespace declarations via ET.register_namespace above
+    root = ET.fromstring(doc_xml)
+
+    # Find body element for use in InsertBlock
+    body = root.find(f"{W}body")
+    if body is None:
+        raise ValueError("No w:body element found in word/document.xml")
+
+    # Apply operations in a safe order that prevents index invalidation:
+    #
+    #  1. Replace ops — modify existing elements in-place.  No structural
+    #     changes, so xpath resolution is unaffected.
+    #
+    #  2. Delete ops in REVERSE base_index order — removing elements from the
+    #     end of the document first ensures that the xpaths of earlier elements
+    #     (needed by subsequent delete/insert ops) remain valid.
+    #
+    #  3. Insert ops sorted by their after_xpath w:p index (ASCENDING) — after
+    #     all deletes are done the tree is stable.  Inserts that reference
+    #     higher positions come last; since we use per-tag xpath counting,
+    #     inserting at a high index doesn't affect resolution of lower anchors.
+    def _para_index_from_xpath(xpath: str) -> int:
+        """Extract the numeric index from the last path segment, e.g. w:p[5] → 5."""
+        if not xpath:
+            return 0
+        m = re.search(r"\[(\d+)\]$", xpath)
+        return int(m.group(1)) if m else 0
+
+    replaces = [op for op in ops if not isinstance(op, (InsertBlock, DeleteBlock))]
+    inserts = sorted(
+        [op for op in ops if isinstance(op, InsertBlock)],
+        key=lambda op: _para_index_from_xpath(op.after_xpath),
+    )
+    deletes = sorted(
+        [op for op in ops if isinstance(op, DeleteBlock)],
+        key=lambda op: op.base_index,
+        reverse=True,
+    )
+
+    for op in replaces:
+        _apply_op(root, body, op)
+    for op in deletes:
+        _apply_op(root, body, op)
+    for op in inserts:
+        _apply_op(root, body, op)
+
+    # Serialise back to bytes
+    new_doc_xml = ET.tostring(root, encoding="unicode", xml_declaration=False)
+    # Prepend XML declaration (ET strips it when encoding='unicode')
+    xml_decl = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
+    new_doc_bytes = (xml_decl + new_doc_xml).encode("utf-8")
+
+    # Write new DOCX
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as out_zf:
+        for name, data in file_map.items():
+            info = zip_info_map[name]
+            new_info = zipfile.ZipInfo(filename=info.filename, date_time=info.date_time)
+            new_info.compress_type = zipfile.ZIP_DEFLATED
+            if name == "word/document.xml":
+                out_zf.writestr(new_info, new_doc_bytes)
+            else:
+                out_zf.writestr(new_info, data)
+
+    output_path.write_bytes(buf.getvalue())
+
+
+# ---------------------------------------------------------------------------
+# Operation dispatch
+# ---------------------------------------------------------------------------
+
+
+def _apply_op(root: ET.Element, body: ET.Element, op: DiffOp) -> None:
+    """Dispatch a single DiffOp to the appropriate handler."""
+    if isinstance(op, ReplaceParagraph):
+        _apply_replace_paragraph(root, op)
+    elif isinstance(op, ReplaceHeading):
+        _apply_replace_heading(root, op)
+    elif isinstance(op, ReplaceCodeBlock):
+        _apply_replace_codeblock(root, op)
+    elif isinstance(op, DeleteBlock):
+        _apply_delete_block(root, op)
+    elif isinstance(op, InsertBlock):
+        _apply_insert_block(root, body, op)
+    elif isinstance(op, ReplaceTable):
+        _apply_replace_table(root, op)
+    elif isinstance(op, ReplaceList):
+        _apply_replace_list(root, op)
+    elif isinstance(op, ReplaceBlockQuote):
+        _apply_replace_blockquote(root, op)
+    # Other op types silently ignored for now
+
+
+# ---------------------------------------------------------------------------
+# XPath resolution
+# ---------------------------------------------------------------------------
+
+_XPATH_PART_RE = re.compile(r"(\w+):(\w+)\[(\d+)\]")
+
+
+def _find_by_xpath(root: ET.Element, xpath: str) -> ET.Element | None:
+    """Resolve a /w:document[1]/... XPath from the document root element.
+
+    The xpath uses per-tag counting: w:p[3] means the 3rd <w:p> child,
+    not the 3rd child overall.
+    """
+    if not xpath:
+        return None
+    parts = xpath.strip("/").split("/")
+    current = root
+    for part in parts[1:]:  # parts[0] is 'w:document[1]' — root itself
+        m = _XPATH_PART_RE.match(part)
+        if not m:
+            return None
+        prefix, local, idx = m.group(1), m.group(2), int(m.group(3))
+        uri = _NS.get(prefix, "")
+        tag = f"{{{uri}}}{local}"
+        count = 0
+        found = None
+        for child in current:
+            if child.tag == tag:
+                count += 1
+                if count == idx:
+                    found = child
+                    break
+        if found is None:
+            return None
+        current = found
+    return current
+
+
+def _find_parent(root: ET.Element, target: ET.Element) -> ET.Element | None:
+    """Walk the tree to find the parent of *target*."""
+    for parent in root.iter():
+        if target in list(parent):
+            return parent
+    return None
+
+
+def _body_child_index(body: ET.Element, element: ET.Element) -> int:
+    """Return the index of *element* among direct children of *body*."""
+    children = list(body)
+    for i, child in enumerate(children):
+        if child is element:
+            return i
+    return -1
+
+
+# ---------------------------------------------------------------------------
+# Paragraph / heading content replacement
+# ---------------------------------------------------------------------------
+
+
+def _apply_replace_paragraph(root: ET.Element, op: ReplaceParagraph) -> None:
+    para = _find_by_xpath(root, op.base_xpath)
+    if para is None:
+        return
+    _replace_inline_content(para, op.new_children)
+
+
+def _apply_replace_heading(root: ET.Element, op: ReplaceHeading) -> None:
+    para = _find_by_xpath(root, op.base_xpath)
+    if para is None:
+        return
+
+    # Update style if level changed
+    if op.old_level != op.new_level and op.new_level > 0:
+        new_style_id = f"Heading{op.new_level}"
+        _set_para_style(para, new_style_id)
+
+    _replace_inline_content(para, op.new_children)
+
+
+def _apply_replace_codeblock(root: ET.Element, op: ReplaceCodeBlock) -> None:
+    para = _find_by_xpath(root, op.base_xpath)
+    if para is None:
+        return
+    # Replace the text content, keeping the existing code style
+    _replace_inline_content(para, [TextRun(text=op.new_code, xpath="")])
+
+
+def _set_para_style(para: ET.Element, style_id: str) -> None:
+    """Set or update the paragraph style in w:pPr/w:pStyle."""
+    ppr = para.find(f"{W}pPr")
+    if ppr is None:
+        ppr = ET.Element(f"{W}pPr")
+        para.insert(0, ppr)
+
+    pstyle = ppr.find(f"{W}pStyle")
+    if pstyle is None:
+        pstyle = ET.SubElement(ppr, f"{W}pStyle")
+        # Insert at position 0 in pPr (pStyle must be first)
+        ppr.remove(pstyle)
+        ppr.insert(0, pstyle)
+
+    pstyle.set(f"{W}val", style_id)
+
+
+def _replace_inline_content(para: ET.Element, inlines: list[InlineNode]) -> None:
+    """Remove all run-type children of *para* and replace with *inlines*."""
+    # Remove existing runs, hyperlinks (but preserve w:pPr)
+    to_remove = []
+    for child in para:
+        tag = child.tag
+        if tag in (
+            f"{W}r",
+            f"{W}hyperlink",
+            f"{W}ins",
+            f"{W}del",
+            f"{W}bookmarkStart",
+            f"{W}bookmarkEnd",
+        ):
+            to_remove.append(child)
+
+    for child in to_remove:
+        para.remove(child)
+
+    # Add new runs
+    new_runs = _inlines_to_xml(inlines)
+    for run_el in new_runs:
+        para.append(run_el)
+
+
+# ---------------------------------------------------------------------------
+# Delete block
+# ---------------------------------------------------------------------------
+
+
+def _apply_delete_block(root: ET.Element, op: DeleteBlock) -> None:
+    if not op.base_xpath:
+        return
+    target = _find_by_xpath(root, op.base_xpath)
+    if target is None:
+        return
+    parent = _find_parent(root, target)
+    if parent is None:
+        return
+    parent.remove(target)
+
+
+# ---------------------------------------------------------------------------
+# Insert block
+# ---------------------------------------------------------------------------
+
+
+def _apply_insert_block(root: ET.Element, body: ET.Element, op: InsertBlock) -> None:
+    """Insert new XML elements for *op.block* after the element at *op.after_xpath*.
+
+    If after_xpath is empty, insert at the beginning of body (before first child).
+    """
+    new_elements = _block_to_xml_elements(op.block)
+    if not new_elements:
+        return
+
+    if op.after_xpath:
+        after_el = _find_by_xpath(root, op.after_xpath)
+        if after_el is None:
+            # Fallback: append to body before sectPr
+            _insert_before_sectpr(body, new_elements)
+            return
+        parent = _find_parent(root, after_el)
+        if parent is None:
+            _insert_before_sectpr(body, new_elements)
+            return
+        # Insert each new element after after_el
+        ref_idx = _body_child_index(parent, after_el)
+        if ref_idx == -1:
+            _insert_before_sectpr(body, new_elements)
+            return
+        for i, el in enumerate(new_elements):
+            parent.insert(ref_idx + 1 + i, el)
+    else:
+        # Insert at beginning of body
+        for i, el in enumerate(new_elements):
+            body.insert(i, el)
+
+
+def _insert_before_sectpr(body: ET.Element, elements: list[ET.Element]) -> None:
+    """Append elements to body, just before the last w:sectPr if present."""
+    children = list(body)
+    insert_idx = len(children)
+    # Find sectPr (section properties — last child of body, must stay last)
+    for i in reversed(range(len(children))):
+        if children[i].tag == f"{W}sectPr":
+            insert_idx = i
+            break
+    for i, el in enumerate(elements):
+        body.insert(insert_idx + i, el)
+
+
+# ---------------------------------------------------------------------------
+# Replace table
+# ---------------------------------------------------------------------------
+
+
+def _apply_replace_table(root: ET.Element, op: ReplaceTable) -> None:
+    tbl = _find_by_xpath(root, op.base_xpath)
+    if tbl is None:
+        return
+
+    # Collect existing table rows
+    existing_rows = [child for child in tbl if child.tag == f"{W}tr"]
+
+    # Iterate over the new rows and update cell content
+    for ri, new_row in enumerate(op.new_rows):
+        if ri >= len(existing_rows):
+            break  # Don't add new rows for now — just update existing
+        existing_tr = existing_rows[ri]
+        existing_cells = [child for child in existing_tr if child.tag == f"{W}tc"]
+        for ci, new_cell in enumerate(new_row.cells):
+            if ci >= len(existing_cells):
+                break
+            existing_tc = existing_cells[ci]
+            # Get the first paragraph in the cell
+            cell_paras = [child for child in existing_tc if child.tag == f"{W}p"]
+            if cell_paras:
+                new_inlines: list[InlineNode] = []
+                for child_block in new_cell.children:
+                    if isinstance(child_block, Paragraph):
+                        new_inlines.extend(child_block.children)
+                    elif isinstance(child_block, Heading):
+                        new_inlines.extend(child_block.children)
+                _replace_inline_content(cell_paras[0], new_inlines)
+
+
+# ---------------------------------------------------------------------------
+# Replace list
+# ---------------------------------------------------------------------------
+
+
+def _apply_replace_list(root: ET.Element, op: ReplaceList) -> None:
+    """Apply per-item ops within a list.
+
+    List items in DOCX are individual w:p elements, each carrying a numPr.
+    The base_xpath on each list item op points to the specific w:p.
+    """
+    for item_op in op.item_ops:
+        if isinstance(item_op, ReplaceListItem):
+            para = _find_by_xpath(root, item_op.base_xpath)
+            if para is None:
+                continue
+            inlines = [TextRun(text=item_op.new_text, xpath="")]
+            _replace_inline_content(para, inlines)
+
+        elif isinstance(item_op, DeleteListItem):
+            if not item_op.base_xpath:
+                continue
+            target = _find_by_xpath(root, item_op.base_xpath)
+            if target is None:
+                continue
+            parent = _find_parent(root, target)
+            if parent is not None:
+                parent.remove(target)
+
+        elif isinstance(item_op, InsertListItem):
+            # Find the list item at the position before this insertion
+            # and copy its structure (to preserve numPr), then update text
+            item = item_op.item
+            if not isinstance(item, ListItem):
+                continue
+            # Use the list's base_xpath to find sibling paragraphs and
+            # copy the last one to inherit numbering properties
+            _insert_list_item(root, op.base_xpath, item)
+
+
+def _insert_list_item(
+    root: ET.Element,
+    list_xpath: str,
+    new_item: ListItem,
+) -> None:
+    """Insert a new list item w:p by cloning a sibling's structure."""
+    # Find a reference paragraph in the list to clone numbering from
+    list_el = _find_by_xpath(root, list_xpath)
+    if list_el is None:
+        return
+    parent = _find_parent(root, list_el)
+    if parent is None:
+        return
+
+    # Clone the reference element, update its text
+    template = copy.deepcopy(list_el)
+    # Replace text content in the clone
+    item_text = " ".join(
+        run.text
+        for child_block in new_item.children
+        for run in (child_block.children if isinstance(child_block, Paragraph) else [])
+        if isinstance(run, TextRun)
+    )
+    _replace_inline_content(template, [TextRun(text=item_text, xpath="")])
+
+    # Insert the clone after the reference
+    ref_idx = _body_child_index(parent, list_el)
+    if ref_idx >= 0:
+        parent.insert(ref_idx + 1, template)
+
+
+# ---------------------------------------------------------------------------
+# Replace block quote
+# ---------------------------------------------------------------------------
+
+
+def _apply_replace_blockquote(root: ET.Element, op: ReplaceBlockQuote) -> None:
+    """Apply inner ops to the contents of a block quote."""
+    body = root.find(f"{W}body")
+    if body is None:
+        return
+    for inner_op in op.inner_ops:
+        _apply_op(root, body, inner_op)
+
+
+# ---------------------------------------------------------------------------
+# XML element creation helpers
+# ---------------------------------------------------------------------------
+
+
+def _block_to_xml_elements(block: BlockNode) -> list[ET.Element]:
+    """Convert an AST block node to one or more w:p / w:tbl elements."""
+    if isinstance(block, Paragraph):
+        return [_make_para_element(block.children, style_id="")]
+    elif isinstance(block, Heading):
+        style_id = f"Heading{block.level}"
+        return [_make_para_element(block.children, style_id=style_id)]
+    elif isinstance(block, CodeBlock):
+        return [_make_code_para_element(block)]
+    elif isinstance(block, ThematicBreak):
+        # A horizontal rule — insert an empty paragraph with "HR" style
+        return [_make_para_element([], style_id="")]
+    elif isinstance(block, BulletList):
+        return _make_list_elements(block.items, ordered=False)
+    elif isinstance(block, OrderedList):
+        return _make_list_elements(block.items, ordered=True)
+    elif isinstance(block, Table):
+        # For now skip table insertion (complex)
+        return []
+    elif isinstance(block, BlockQuote):
+        return [
+            _make_para_element(
+                inner.children if isinstance(inner, Paragraph) else [], style_id="Quote"
+            )
+            for inner in block.children
+            if isinstance(inner, Paragraph)
+        ]
+    return []
+
+
+def _make_para_element(inlines: list[InlineNode], style_id: str) -> ET.Element:
+    """Create a <w:p> element with the given inline content and style."""
+    para = ET.Element(f"{W}p")
+
+    if style_id:
+        ppr = ET.SubElement(para, f"{W}pPr")
+        pstyle = ET.SubElement(ppr, f"{W}pStyle")
+        pstyle.set(f"{W}val", style_id)
+
+    for run_el in _inlines_to_xml(inlines):
+        para.append(run_el)
+
+    return para
+
+
+def _make_code_para_element(block: CodeBlock) -> ET.Element:
+    """Create a <w:p> element for a code block with monospace font."""
+    para = ET.Element(f"{W}p")
+
+    ppr = ET.SubElement(para, f"{W}pPr")
+    pstyle = ET.SubElement(ppr, f"{W}pStyle")
+    pstyle.set(f"{W}val", "Code")
+
+    for line in block.code.split("\n"):
+        run = ET.SubElement(para, f"{W}r")
+        rpr = ET.SubElement(run, f"{W}rPr")
+        fonts = ET.SubElement(rpr, f"{W}rFonts")
+        fonts.set(f"{W}ascii", "Courier New")
+        fonts.set(f"{W}hAnsi", "Courier New")
+        t = ET.SubElement(run, f"{W}t")
+        t.text = line
+        if line and (line[0] == " " or line[-1] == " "):
+            t.set(f"{XML}space", "preserve")
+
+    return para
+
+
+def _make_list_elements(items: list[ListItem], *, ordered: bool) -> list[ET.Element]:
+    """Create w:p elements for each list item with a minimal numPr stub."""
+    elements: list[ET.Element] = []
+    style_id = "ListNumber" if ordered else "ListBullet"
+    for item in items:
+        inlines: list[InlineNode] = []
+        for child in item.children:
+            if isinstance(child, Paragraph):
+                inlines.extend(child.children)
+        para = ET.Element(f"{W}p")
+        ppr = ET.SubElement(para, f"{W}pPr")
+        pstyle = ET.SubElement(ppr, f"{W}pStyle")
+        pstyle.set(f"{W}val", style_id)
+        for run_el in _inlines_to_xml(inlines):
+            para.append(run_el)
+        elements.append(para)
+    return elements
+
+
+def _inlines_to_xml(inlines: list[InlineNode]) -> list[ET.Element]:
+    """Convert inline AST nodes to a list of w:r / w:hyperlink elements."""
+    result: list[ET.Element] = []
+    for node in inlines:
+        if isinstance(node, TextRun):
+            result.append(_make_run_element(node))
+        elif isinstance(node, Link):
+            # Render link as plain text run (can't create rels easily)
+            link_text = ""
+            for child in node.children:
+                if isinstance(child, TextRun):
+                    link_text += child.text
+            if link_text:
+                result.append(_make_run_element(TextRun(text=link_text, xpath="")))
+        elif isinstance(node, Image):
+            # Skip images — can't recreate from markdown
+            pass
+        elif isinstance(node, LineBreak):
+            run = ET.Element(f"{W}r")
+            br = ET.SubElement(run, f"{W}br")
+            br.set(f"{W}type", "textWrapping")
+            result.append(run)
+    return result
+
+
+def _make_run_element(run: TextRun) -> ET.Element:
+    """Convert a TextRun AST node to a <w:r> XML element."""
+    r = ET.Element(f"{W}r")
+
+    # Build rPr only if there are formatting flags
+    if run.bold or run.italic or run.underline or run.strikethrough or run.code:
+        rpr = ET.SubElement(r, f"{W}rPr")
+        if run.bold:
+            ET.SubElement(rpr, f"{W}b")
+        if run.italic:
+            ET.SubElement(rpr, f"{W}i")
+        if run.underline:
+            u = ET.SubElement(rpr, f"{W}u")
+            u.set(f"{W}val", "single")
+        if run.strikethrough:
+            ET.SubElement(rpr, f"{W}strike")
+        if run.code:
+            fonts = ET.SubElement(rpr, f"{W}rFonts")
+            fonts.set(f"{W}ascii", "Courier New")
+            fonts.set(f"{W}hAnsi", "Courier New")
+
+    t = ET.SubElement(r, f"{W}t")
+    t.text = run.text
+    # xml:space="preserve" is needed when text starts/ends with whitespace
+    if run.text and (run.text[0] == " " or run.text[-1] == " "):
+        t.set(f"{XML}space", "preserve")
+
+    return r
diff --git a/extradocx/src/extradocx/md_diff.py b/extradocx/src/extradocx/md_diff.py
new file mode 100644
index 00000000..bf762575
--- /dev/null
+++ b/extradocx/src/extradocx/md_diff.py
@@ -0,0 +1,690 @@
+"""
+Markdown AST diff algorithm.
+
+Compares a **base** AST (produced by the DOCX parser, carrying xpaths) against
+a **derived** AST (produced by parsing the user-edited markdown, no xpaths)
+and emits a list of ``DiffOp`` describing the edits.
+
+Each operation reads: "Take this node in the base AST and perform this edit."
+
+The algorithm has two layers:
+
+1. **Block-level alignment** — a DP (dynamic programming) sequence alignment
+   that matches base blocks to derived blocks, detecting insertions, deletions,
+   and modifications.  Inspired by ``extradoc/diffmerge/content_align.py``.
+
+2. **Per-block diffing** — for each matched pair, compare the block content
+   and emit the appropriate operation type (ReplaceHeading, ReplaceParagraph,
+   etc.) only if content actually changed.
+
+Public API:
+
+    diff(base: Document, derived: Document) -> list[DiffOp]
+"""
+
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass
+
+from extradocx.ast_nodes import (
+    BlockNode,
+    BlockQuote,
+    BulletList,
+    CodeBlock,
+    Document,
+    Heading,
+    InlineNode,
+    ListItem,
+    OrderedList,
+    Paragraph,
+    Table,
+    TextRun,
+    ThematicBreak,
+)
+from extradocx.diff_ops import (
+    DeleteBlock,
+    DeleteListItem,
+    DiffOp,
+    InsertBlock,
+    InsertListItem,
+    ListItemOp,
+    ReplaceBlockQuote,
+    ReplaceCodeBlock,
+    ReplaceHeading,
+    ReplaceList,
+    ReplaceListItem,
+    ReplaceParagraph,
+    ReplaceTable,
+)
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def diff(base: Document, derived: Document) -> list[DiffOp]:
+    """Diff two document ASTs and return a list of edit operations.
+
+    ``base`` is the original AST (from DOCX, with xpaths).
+    ``derived`` is the AST parsed from the user-edited markdown.
+
+    Returns a list of ``DiffOp`` that, when conceptually applied to ``base``,
+    would produce ``derived``.
+    """
+    alignment = _align_blocks(base.children, derived.children)
+    return _alignment_to_ops(base.children, derived.children, alignment)
+
+
+# ---------------------------------------------------------------------------
+# Block alignment (DP)
+# ---------------------------------------------------------------------------
+
+# Cost constants
+_PARA_COST_PER_CHAR = 2.0
+_TABLE_CELL_COST = 10.0
+_FIXED_COST = 20.0
+_MIN_SIMILARITY = 0.3
+
+
+@dataclass
+class _BlockAlignment:
+    """Result of aligning two block sequences."""
+
+    matches: list[tuple[int, int]]  # (base_idx, derived_idx) pairs
+    base_deletes: list[int]  # base indices with no derived match
+    derived_inserts: list[int]  # derived indices with no base match
+
+
+def _align_blocks(base: list[BlockNode], derived: list[BlockNode]) -> _BlockAlignment:
+    """DP-based alignment of two block sequences."""
+    m = len(base)
+    n = len(derived)
+
+    # dp[i][j] = min cost to align base[0..i-1] with derived[0..j-1]
+    INF = math.inf
+    dp = [[INF] * (n + 1) for _ in range(m + 1)]
+    # choice[i][j]: 0 = match, 1 = delete base[i-1], 2 = insert derived[j-1]
+    choice = [[0] * (n + 1) for _ in range(m + 1)]
+
+    dp[0][0] = 0.0
+    for i in range(1, m + 1):
+        dp[i][0] = dp[i - 1][0] + _delete_cost(base[i - 1])
+        choice[i][0] = 1
+    for j in range(1, n + 1):
+        dp[0][j] = dp[0][j - 1] + _insert_cost(derived[j - 1])
+        choice[0][j] = 2
+
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            # Option 1: delete base[i-1]
+            del_cost = dp[i - 1][j] + _delete_cost(base[i - 1])
+            # Option 2: insert derived[j-1]
+            ins_cost = dp[i][j - 1] + _insert_cost(derived[j - 1])
+            # Option 3: match
+            match_cost = INF
+            if _matchable(base[i - 1], derived[j - 1]):
+                match_cost = dp[i - 1][j - 1] + _edit_cost(base[i - 1], derived[j - 1])
+
+            best = min(match_cost, del_cost, ins_cost)
+            dp[i][j] = best
+            if best == match_cost:
+                choice[i][j] = 0
+            elif best == del_cost:
+                choice[i][j] = 1
+            else:
+                choice[i][j] = 2
+
+    # Traceback
+    matches: list[tuple[int, int]] = []
+    base_deletes: list[int] = []
+    derived_inserts: list[int] = []
+
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and choice[i][j] == 0:
+            matches.append((i - 1, j - 1))
+            i -= 1
+            j -= 1
+        elif i > 0 and choice[i][j] == 1:
+            base_deletes.append(i - 1)
+            i -= 1
+        else:
+            derived_inserts.append(j - 1)
+            j -= 1
+
+    matches.reverse()
+    base_deletes.reverse()
+    derived_inserts.reverse()
+
+    return _BlockAlignment(
+        matches=matches,
+        base_deletes=base_deletes,
+        derived_inserts=derived_inserts,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Cost functions
+# ---------------------------------------------------------------------------
+
+
+def _block_text(block: BlockNode) -> str:
+    """Extract plain text from a block for similarity comparison."""
+    if isinstance(block, (Paragraph, Heading)):
+        return _inlines_text(block.children)
+    elif isinstance(block, CodeBlock):
+        return block.code
+    elif isinstance(block, (BulletList, OrderedList)):
+        parts = []
+        for item in block.items:
+            for child in item.children:
+                parts.append(_block_text(child))
+        return " ".join(parts)
+    elif isinstance(block, Table):
+        parts = []
+        for row in block.rows:
+            for cell in row.cells:
+                for child in cell.children:
+                    parts.append(_block_text(child))
+        return " ".join(parts)
+    elif isinstance(block, BlockQuote):
+        return " ".join(_block_text(c) for c in block.children)
+    elif isinstance(block, ThematicBreak):
+        return "---"
+    return ""
+
+
+def _inlines_text(inlines: list[InlineNode]) -> str:
+    """Extract plain text from inline nodes."""
+    parts = []
+    for node in inlines:
+        if isinstance(node, TextRun):
+            parts.append(node.text)
+        elif hasattr(node, "children"):
+            parts.append(_inlines_text(node.children))
+    return "".join(parts)
+
+
+def _word_jaccard(a: str, b: str) -> float:
+    """Token-level Jaccard similarity."""
+    if not a and not b:
+        return 1.0
+    tokens_a = set(a.lower().split())
+    tokens_b = set(b.lower().split())
+    if not tokens_a and not tokens_b:
+        return 1.0
+    if not tokens_a or not tokens_b:
+        return 0.0
+    intersection = tokens_a & tokens_b
+    union = tokens_a | tokens_b
+    return len(intersection) / len(union)
+
+
+def _block_kind(block: BlockNode) -> str:
+    """Return a coarse kind string for matchability gating."""
+    if isinstance(block, Heading):
+        return "heading"
+    elif isinstance(block, Paragraph):
+        return "paragraph"
+    elif isinstance(block, CodeBlock):
+        return "code_block"
+    elif isinstance(block, BulletList):
+        return "bullet_list"
+    elif isinstance(block, OrderedList):
+        return "ordered_list"
+    elif isinstance(block, Table):
+        return "table"
+    elif isinstance(block, BlockQuote):
+        return "block_quote"
+    elif isinstance(block, ThematicBreak):
+        return "thematic_break"
+    return "other"
+
+
+def _matchable(base: BlockNode, derived: BlockNode) -> bool:
+    """Can these two blocks be matched (same kind + sufficient similarity)?"""
+    bk = _block_kind(base)
+    dk = _block_kind(derived)
+
+    # Headings and paragraphs can cross-match (a heading can become a paragraph
+    # and vice versa) — but with a higher cost.
+    text_kinds = {"heading", "paragraph"}
+    if bk in text_kinds and dk in text_kinds:
+        sim = _word_jaccard(_block_text(base), _block_text(derived))
+        return sim >= _MIN_SIMILARITY
+
+    if bk != dk:
+        return False
+
+    if bk == "thematic_break":
+        return True
+
+    sim = _word_jaccard(_block_text(base), _block_text(derived))
+    return sim >= _MIN_SIMILARITY
+
+
+def _delete_cost(block: BlockNode) -> float:
+    text = _block_text(block)
+    if isinstance(block, Table):
+        n_cells = sum(len(r.cells) for r in block.rows)
+        return n_cells * _TABLE_CELL_COST
+    if isinstance(block, ThematicBreak):
+        return _FIXED_COST
+    return max(len(text) * _PARA_COST_PER_CHAR, _FIXED_COST)
+
+
+def _insert_cost(block: BlockNode) -> float:
+    return _delete_cost(block)
+
+
+def _edit_cost(base: BlockNode, derived: BlockNode) -> float:
+    """Estimated cost of transforming base into derived."""
+    text_b = _block_text(base)
+    text_d = _block_text(derived)
+
+    # Exact match — zero cost
+    if text_b == text_d:
+        # But check structural properties too
+        if isinstance(base, Heading) and isinstance(derived, Heading):
+            if base.level != derived.level:
+                return 1.0  # tiny cost for level change
+            return 0.0
+        if type(base) is type(derived):
+            return 0.0
+        return 1.0  # kind change but same text (e.g. paragraph ↔ heading)
+
+    sim = _word_jaccard(text_b, text_d)
+    max_len = max(len(text_b), len(text_d), 1)
+    return (1.0 - sim) * max_len
+
+
+# ---------------------------------------------------------------------------
+# Convert alignment to operations
+# ---------------------------------------------------------------------------
+
+
+def _alignment_to_ops(
+    base: list[BlockNode],
+    derived: list[BlockNode],
+    alignment: _BlockAlignment,
+) -> list[DiffOp]:
+    """Convert a block alignment into a list of DiffOp."""
+    ops: list[DiffOp] = []
+
+    # Deletions (iterate in reverse index order so positions are stable)
+    for bi in reversed(alignment.base_deletes):
+        ops.append(
+            DeleteBlock(
+                base_index=bi,
+                base_xpath=getattr(base[bi], "xpath", ""),
+            )
+        )
+
+    # Build a lookup: derived_index → xpath of the last base element before it.
+    # Sorted by derived index so we can scan forward.
+    _sorted_matches = sorted(alignment.matches, key=lambda m: m[1])  # sort by derived idx
+
+    def _after_xpath_for(di: int) -> str:
+        """Return the xpath of the last matched base element with derived_idx < di."""
+        prior = ""
+        for bi, mdj in _sorted_matches:
+            if mdj < di:
+                prior = getattr(base[bi], "xpath", "")
+            else:
+                break
+        return prior
+
+    # Insertions
+    for di in alignment.derived_inserts:
+        ops.append(
+            InsertBlock(position=di, block=derived[di], after_xpath=_after_xpath_for(di))
+        )
+
+    # Matched pairs — emit replace ops only if content changed
+    for bi, di in alignment.matches:
+        block_ops = _diff_matched_blocks(base[bi], derived[di], bi)
+        ops.extend(block_ops)
+
+    # Sort: deletes first (reversed), then replaces/inserts by position
+    # This gives a predictable ordering for consumers.
+    def _sort_key(op: DiffOp) -> tuple[int, int]:
+        if isinstance(op, DeleteBlock):
+            return (0, op.base_index)
+        if isinstance(op, InsertBlock):
+            return (2, op.position)
+        # Replace ops
+        idx = getattr(op, "base_index", 0)
+        return (1, idx)
+
+    ops.sort(key=_sort_key)
+    return ops
+
+
+def _diff_matched_blocks(base: BlockNode, derived: BlockNode, base_index: int) -> list[DiffOp]:
+    """Diff a matched pair of blocks. Returns empty list if identical."""
+    # Heading
+    if isinstance(base, Heading) and isinstance(derived, Heading):
+        return _diff_heading(base, derived, base_index)
+
+    # Heading ↔ Paragraph (kind change)
+    if isinstance(base, Heading) and isinstance(derived, Paragraph):
+        new_text = _inlines_text(derived.children)
+        old_text = _inlines_text(base.children)
+        if old_text == new_text:
+            return []
+        return [
+            ReplaceParagraph(
+                base_index=base_index,
+                base_xpath=base.xpath,
+                old_text=old_text,
+                new_text=new_text,
+                new_children=derived.children,
+            )
+        ]
+
+    if isinstance(base, Paragraph) and isinstance(derived, Heading):
+        old_text = _inlines_text(base.children)
+        new_text = _inlines_text(derived.children)
+        return [
+            ReplaceHeading(
+                base_index=base_index,
+                base_xpath=base.xpath,
+                old_level=0,
+                new_level=derived.level,
+                old_text=old_text,
+                new_text=new_text,
+                new_children=derived.children,
+            )
+        ]
+
+    # Paragraph
+    if isinstance(base, Paragraph) and isinstance(derived, Paragraph):
+        return _diff_paragraph(base, derived, base_index)
+
+    # CodeBlock
+    if isinstance(base, CodeBlock) and isinstance(derived, CodeBlock):
+        return _diff_codeblock(base, derived, base_index)
+
+    # Table
+    if isinstance(base, Table) and isinstance(derived, Table):
+        return _diff_table(base, derived, base_index)
+
+    # Lists
+    if isinstance(base, BulletList) and isinstance(derived, BulletList):
+        return _diff_list(base.items, derived.items, base_index, base.xpath, "bullet")
+    if isinstance(base, OrderedList) and isinstance(derived, OrderedList):
+        return _diff_list(base.items, derived.items, base_index, base.xpath, "ordered")
+
+    # BlockQuote
+    if isinstance(base, BlockQuote) and isinstance(derived, BlockQuote):
+        return _diff_blockquote(base, derived, base_index)
+
+    # ThematicBreak — no content to diff
+    if isinstance(base, ThematicBreak) and isinstance(derived, ThematicBreak):
+        return []
+
+    return []
+
+
+# ---------------------------------------------------------------------------
+# Per-block diff helpers
+# ---------------------------------------------------------------------------
+
+
+def _diff_heading(base: Heading, derived: Heading, base_index: int) -> list[DiffOp]:
+    old_text = _inlines_text(base.children)
+    new_text = _inlines_text(derived.children)
+    if base.level == derived.level and old_text == new_text:
+        # Check inline formatting too
+        if _inlines_equal(base.children, derived.children):
+            return []
+    return [
+        ReplaceHeading(
+            base_index=base_index,
+            base_xpath=base.xpath,
+            old_level=base.level,
+            new_level=derived.level,
+            old_text=old_text,
+            new_text=new_text,
+            new_children=derived.children,
+        )
+    ]
+
+
+def _diff_paragraph(base: Paragraph, derived: Paragraph, base_index: int) -> list[DiffOp]:
+    old_text = _inlines_text(base.children)
+    new_text = _inlines_text(derived.children)
+    if old_text == new_text and _inlines_equal(base.children, derived.children):
+        return []
+    return [
+        ReplaceParagraph(
+            base_index=base_index,
+            base_xpath=base.xpath,
+            old_text=old_text,
+            new_text=new_text,
+            new_children=derived.children,
+        )
+    ]
+
+
+def _diff_codeblock(base: CodeBlock, derived: CodeBlock, base_index: int) -> list[DiffOp]:
+    if base.code == derived.code and base.language == derived.language:
+        return []
+    return [
+        ReplaceCodeBlock(
+            base_index=base_index,
+            base_xpath=base.xpath,
+            old_code=base.code,
+            new_code=derived.code,
+            old_language=base.language,
+            new_language=derived.language,
+        )
+    ]
+
+
+def _diff_table(base: Table, derived: Table, base_index: int) -> list[DiffOp]:
+    # Compare cell text grids
+    def _cell_grid(tbl: Table) -> list[list[str]]:
+        grid = []
+        for row in tbl.rows:
+            row_texts = []
+            for cell in row.cells:
+                text = " ".join(_block_text(c) for c in cell.children)
+                row_texts.append(text)
+            grid.append(row_texts)
+        return grid
+
+    bg = _cell_grid(base)
+    dg = _cell_grid(derived)
+    if bg == dg:
+        return []
+
+    return [
+        ReplaceTable(
+            base_index=base_index,
+            base_xpath=base.xpath,
+            new_rows=derived.rows,
+        )
+    ]
+
+
+def _diff_list(
+    base_items: list[ListItem],
+    derived_items: list[ListItem],
+    base_index: int,
+    base_xpath: str,
+    list_type: str,
+) -> list[DiffOp]:
+    """Diff two lists using item-level DP alignment."""
+    alignment = _align_list_items(base_items, derived_items)
+
+    # Check if anything actually changed
+    if (
+        not alignment.base_deletes
+        and not alignment.derived_inserts
+        and all(
+            _item_text(base_items[bi]) == _item_text(derived_items[di])
+            for bi, di in alignment.matches
+        )
+    ):
+        return []
+
+    item_ops: list[ListItemOp] = []
+
+    for bi in reversed(alignment.base_deletes):
+        item_ops.append(
+            DeleteListItem(
+                base_item_index=bi,
+                base_xpath=base_items[bi].xpath,
+            )
+        )
+
+    for di in alignment.derived_inserts:
+        item_ops.append(InsertListItem(position=di, item=derived_items[di]))
+
+    for bi, di in alignment.matches:
+        old_text = _item_text(base_items[bi])
+        new_text = _item_text(derived_items[di])
+        if old_text != new_text:
+            item_ops.append(
+                ReplaceListItem(
+                    base_item_index=bi,
+                    base_xpath=base_items[bi].xpath,
+                    old_text=old_text,
+                    new_text=new_text,
+                )
+            )
+
+    if not item_ops:
+        return []
+
+    return [
+        ReplaceList(
+            base_index=base_index,
+            base_xpath=base_xpath,
+            list_type=list_type,
+            item_ops=item_ops,
+            new_items=derived_items,
+        )
+    ]
+
+
+def _item_text(item: ListItem) -> str:
+    parts = []
+    for child in item.children:
+        parts.append(_block_text(child))
+    return " ".join(parts)
+
+
+def _align_list_items(base: list[ListItem], derived: list[ListItem]) -> _BlockAlignment:
+    """Simple DP alignment for list items (same algorithm as blocks)."""
+    m = len(base)
+    n = len(derived)
+    INF = math.inf
+
+    dp = [[INF] * (n + 1) for _ in range(m + 1)]
+    choice = [[0] * (n + 1) for _ in range(m + 1)]
+    dp[0][0] = 0.0
+
+    for i in range(1, m + 1):
+        dp[i][0] = dp[i - 1][0] + _FIXED_COST
+        choice[i][0] = 1
+    for j in range(1, n + 1):
+        dp[0][j] = dp[0][j - 1] + _FIXED_COST
+        choice[0][j] = 2
+
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            bt = _item_text(base[i - 1])
+            dt = _item_text(derived[j - 1])
+            sim = _word_jaccard(bt, dt)
+
+            del_cost = dp[i - 1][j] + _FIXED_COST
+            ins_cost = dp[i][j - 1] + _FIXED_COST
+            match_cost = INF
+            if sim >= _MIN_SIMILARITY:
+                if bt == dt:
+                    match_cost = dp[i - 1][j - 1]
+                else:
+                    match_cost = dp[i - 1][j - 1] + (1.0 - sim) * max(len(bt), len(dt), 1)
+
+            best = min(match_cost, del_cost, ins_cost)
+            dp[i][j] = best
+            if best == match_cost:
+                choice[i][j] = 0
+            elif best == del_cost:
+                choice[i][j] = 1
+            else:
+                choice[i][j] = 2
+
+    matches: list[tuple[int, int]] = []
+    base_deletes: list[int] = []
+    derived_inserts: list[int] = []
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and choice[i][j] == 0:
+            matches.append((i - 1, j - 1))
+            i -= 1
+            j -= 1
+        elif i > 0 and choice[i][j] == 1:
+            base_deletes.append(i - 1)
+            i -= 1
+        else:
+            derived_inserts.append(j - 1)
+            j -= 1
+
+    matches.reverse()
+    base_deletes.reverse()
+    derived_inserts.reverse()
+
+    return _BlockAlignment(
+        matches=matches,
+        base_deletes=base_deletes,
+        derived_inserts=derived_inserts,
+    )
+
+
+def _diff_blockquote(base: BlockQuote, derived: BlockQuote, base_index: int) -> list[DiffOp]:
+    """Recursively diff block quote contents."""
+    inner_alignment = _align_blocks(base.children, derived.children)
+    inner_ops = _alignment_to_ops(base.children, derived.children, inner_alignment)
+    if not inner_ops:
+        return []
+    return [
+        ReplaceBlockQuote(
+            base_index=base_index,
+            base_xpath=base.xpath,
+            inner_ops=inner_ops,
+        )
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Inline comparison
+# ---------------------------------------------------------------------------
+
+
+def _inlines_equal(a: list[InlineNode], b: list[InlineNode]) -> bool:
+    """Check if two inline node lists are structurally equal (ignoring xpath)."""
+    if len(a) != len(b):
+        return False
+    for x, y in zip(a, b):
+        if type(x) is not type(y):
+            return False
+        if isinstance(x, TextRun) and isinstance(y, TextRun):
+            if (
+                x.text != y.text
+                or x.bold != y.bold
+                or x.italic != y.italic
+                or x.underline != y.underline
+                or x.strikethrough != y.strikethrough
+                or x.code != y.code
+                or x.superscript != y.superscript
+                or x.subscript != y.subscript
+            ):
+                return False
+        elif hasattr(x, "children") and hasattr(y, "children"):
+            if not _inlines_equal(x.children, y.children):
+                return False
+    return True
diff --git a/extradocx/src/extradocx/md_parser.py b/extradocx/src/extradocx/md_parser.py
new file mode 100644
index 00000000..e03bfaf1
--- /dev/null
+++ b/extradocx/src/extradocx/md_parser.py
@@ -0,0 +1,407 @@
+"""
+GFM Markdown → AST parser.
+
+Parses GFM markdown text back into the same AST node types produced by the
+DOCX parser (`ast_nodes.py`).  Nodes created here carry **no** xpath — the
+xpath field is left empty because these nodes originate from markdown, not
+from a DOCX XML tree.
+
+The parser is deliberately simple: it handles the GFM subset that the
+markdown serializer can produce (ATX headings, emphasis, strong, strikethrough,
+code spans, fenced code blocks, bullet/ordered lists, pipe tables, block
+quotes, thematic breaks, links, images).
+
+Public API:
+
+    parse_markdown(text: str) -> Document
+"""
+
+from __future__ import annotations
+
+import re
+
+from extradocx.ast_nodes import (
+    BlockNode,
+    BlockQuote,
+    BulletList,
+    CodeBlock,
+    Document,
+    Heading,
+    Image,
+    InlineNode,
+    LineBreak,
+    Link,
+    ListItem,
+    OrderedList,
+    Paragraph,
+    Table,
+    TableCell,
+    TableRow,
+    TextRun,
+    ThematicBreak,
+)
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def parse_markdown(text: str) -> Document:
+    """Parse a GFM markdown string into a Document AST."""
+    lines = text.split("\n")
+    children = _parse_blocks(lines, 0, len(lines))
+    return Document(children=children)
+
+
+# ---------------------------------------------------------------------------
+# Block-level parsing
+# ---------------------------------------------------------------------------
+
+# Patterns
+_ATX_HEADING_RE = re.compile(r"^(#{1,6})\s+(.*?)(?:\s+#+\s*)?$")
+_THEMATIC_BREAK_RE = re.compile(r"^(?:---|\*\*\*|___)\s*$")
+_FENCE_RE = re.compile(r"^(`{3,}|~{3,})(.*)$")
+_BULLET_RE = re.compile(r"^(\s*)[-*+]\s+(.*)")
+_ORDERED_RE = re.compile(r"^(\s*)(\d+)\.\s+(.*)")
+_BLOCKQUOTE_RE = re.compile(r"^>\s?(.*)")
+_TABLE_SEP_RE = re.compile(r"^\|[\s\-:|]+\|$")
+_TABLE_ROW_RE = re.compile(r"^\|(.+)\|$")
+
+
+def _parse_blocks(lines: list[str], start: int, end: int) -> list[BlockNode]:
+    """Parse lines[start:end] into a list of block nodes."""
+    blocks: list[BlockNode] = []
+    i = start
+    while i < end:
+        line = lines[i]
+
+        # Blank line — skip
+        if not line.strip():
+            i += 1
+            continue
+
+        # Thematic break
+        if _THEMATIC_BREAK_RE.match(line):
+            blocks.append(ThematicBreak())
+            i += 1
+            continue
+
+        # ATX heading
+        m = _ATX_HEADING_RE.match(line)
+        if m:
+            level = len(m.group(1))
+            inlines = _parse_inlines(m.group(2))
+            blocks.append(Heading(level=level, children=inlines))
+            i += 1
+            continue
+
+        # Fenced code block
+        m = _FENCE_RE.match(line)
+        if m:
+            fence_char = m.group(1)[0]
+            fence_len = len(m.group(1))
+            language = m.group(2).strip()
+            code_lines: list[str] = []
+            i += 1
+            while i < end:
+                close_m = re.match(rf"^{re.escape(fence_char)}{{{fence_len},}}$", lines[i])
+                if close_m:
+                    i += 1
+                    break
+                code_lines.append(lines[i])
+                i += 1
+            blocks.append(CodeBlock(code="\n".join(code_lines), language=language))
+            continue
+
+        # Block quote
+        if _BLOCKQUOTE_RE.match(line):
+            bq_lines: list[str] = []
+            while i < end:
+                bq_m = _BLOCKQUOTE_RE.match(lines[i])
+                if bq_m:
+                    bq_lines.append(bq_m.group(1))
+                    i += 1
+                else:
+                    break
+            inner = _parse_blocks(bq_lines, 0, len(bq_lines))
+            blocks.append(BlockQuote(children=inner))
+            continue
+
+        # Bullet list
+        if _BULLET_RE.match(line):
+            items, i = _parse_list_items(lines, i, end, ordered=False)
+            blocks.append(BulletList(items=items))
+            continue
+
+        # Ordered list
+        if _ORDERED_RE.match(line):
+            items, i = _parse_list_items(lines, i, end, ordered=True)
+            # Extract start number from the first item
+            m_start = _ORDERED_RE.match(line)
+            start_num = int(m_start.group(2)) if m_start else 1
+            blocks.append(OrderedList(items=items, start=start_num))
+            continue
+
+        # Table (pipe table)
+        if _TABLE_ROW_RE.match(line):
+            tbl, i = _parse_table(lines, i, end)
+            if tbl is not None:
+                blocks.append(tbl)
+            else:
+                # Not a valid table — treat as paragraph
+                inlines = _parse_inlines(line)
+                if inlines:
+                    blocks.append(Paragraph(children=inlines))
+                i += 1
+            continue
+
+        # Plain paragraph
+        para_lines: list[str] = []
+        while i < end and lines[i].strip():
+            # Stop at block-level constructs
+            if _ATX_HEADING_RE.match(lines[i]):
+                break
+            if _THEMATIC_BREAK_RE.match(lines[i]):
+                break
+            if _FENCE_RE.match(lines[i]):
+                break
+            if _BLOCKQUOTE_RE.match(lines[i]):
+                break
+            if _BULLET_RE.match(lines[i]):
+                break
+            if _ORDERED_RE.match(lines[i]):
+                break
+            if _TABLE_ROW_RE.match(lines[i]):
+                break
+            para_lines.append(lines[i])
+            i += 1
+        if para_lines:
+            text_content = " ".join(para_lines)
+            inlines = _parse_inlines(text_content)
+            if inlines:
+                blocks.append(Paragraph(children=inlines))
+
+    return blocks
+
+
+def _parse_list_items(
+    lines: list[str], start: int, end: int, *, ordered: bool
+) -> tuple[list[ListItem], int]:
+    """Parse consecutive list items. Returns (items, next_line_index)."""
+    items: list[ListItem] = []
+    pattern = _ORDERED_RE if ordered else _BULLET_RE
+    i = start
+
+    while i < end:
+        m = pattern.match(lines[i])
+        if not m:
+            break
+
+        indent = len(m.group(1))
+        depth = indent // 2
+        if ordered:
+            first_line_text = m.group(3)
+        else:
+            first_line_text = m.group(2)
+
+        # Collect continuation lines for this item
+        item_lines = [first_line_text]
+        i += 1
+        # Continuation lines are indented more than the bullet
+        while i < end and lines[i].strip():
+            # Check if next line is a new list item at same or lower depth
+            next_m = pattern.match(lines[i])
+            if next_m:
+                break
+            # Check for other bullet type starting a new list
+            other_pattern = _BULLET_RE if ordered else _ORDERED_RE
+            if other_pattern.match(lines[i]):
+                break
+            item_lines.append(lines[i].strip())
+            i += 1
+
+        # Parse the item content as blocks
+        item_text = " ".join(item_lines)
+        children: list[BlockNode] = []
+        if item_text:
+            inlines = _parse_inlines(item_text)
+            if inlines:
+                children.append(Paragraph(children=inlines))
+        items.append(ListItem(children=children, depth=depth))
+
+    return items, i
+
+
+def _parse_table(lines: list[str], start: int, end: int) -> tuple[Table | None, int]:
+    """Parse a GFM pipe table starting at `start`. Returns (Table, next_line) or (None, start)."""
+    # Need at least header row + separator
+    if start + 1 >= end:
+        return None, start
+
+    header_line = lines[start]
+    sep_line = lines[start + 1]
+
+    if not _TABLE_ROW_RE.match(header_line):
+        return None, start
+    if not _TABLE_SEP_RE.match(sep_line):
+        return None, start
+
+    rows: list[TableRow] = []
+
+    # Parse header row
+    header_cells = _split_table_row(header_line)
+    header_row = TableRow(
+        cells=[
+            TableCell(children=[Paragraph(children=_parse_inlines(c))], is_header=True)
+            for c in header_cells
+        ],
+        is_header=True,
+    )
+    rows.append(header_row)
+
+    # Parse data rows
+    i = start + 2
+    while i < end:
+        if not _TABLE_ROW_RE.match(lines[i]):
+            break
+        cell_texts = _split_table_row(lines[i])
+        data_row = TableRow(
+            cells=[TableCell(children=[Paragraph(children=_parse_inlines(c))]) for c in cell_texts],
+        )
+        rows.append(data_row)
+        i += 1
+
+    return Table(rows=rows), i
+
+
+def _split_table_row(line: str) -> list[str]:
+    """Split a pipe-table row into cell text strings."""
+    # Strip outer pipes and split
+    inner = line.strip()
+    if inner.startswith("|"):
+        inner = inner[1:]
+    if inner.endswith("|"):
+        inner = inner[:-1]
+    # Split on unescaped pipes
+    parts: list[str] = []
+    current: list[str] = []
+    escaped = False
+    for ch in inner:
+        if escaped:
+            current.append(ch)
+            escaped = False
+        elif ch == "\\":
+            escaped = True
+            current.append(ch)
+        elif ch == "|":
+            parts.append("".join(current).strip())
+            current = []
+        else:
+            current.append(ch)
+    parts.append("".join(current).strip())
+    return parts
+
+
+# ---------------------------------------------------------------------------
+# Inline-level parsing
+# ---------------------------------------------------------------------------
+
+# Inline patterns — order matters for greedy matching
+_INLINE_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
+    # Image must come before link (![...] vs [...])
+    ("image", re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")),
+    # Link
+    ("link", re.compile(r"\[([^\]]*)\]\(([^)]*?)(?:\s+\"([^\"]*)\")?\)")),
+    # Code span (double backtick)
+    ("code2", re.compile(r"``\s(.+?)\s``")),
+    # Code span (single backtick)
+    ("code1", re.compile(r"`([^`]+)`")),
+    # Bold + italic
+    ("bold_italic", re.compile(r"\*\*\*(.+?)\*\*\*")),
+    # Bold
+    ("bold", re.compile(r"\*\*(.+?)\*\*")),
+    # Strikethrough
+    ("strike", re.compile(r"~~(.+?)~~")),
+    # Italic
+    ("italic", re.compile(r"\*(.+?)\*")),
+    # Hard line break (two spaces + newline) — rare in single-line context
+    ("linebreak", re.compile(r"  \n")),
+]
+
+
+def _parse_inlines(text: str) -> list[InlineNode]:
+    """Parse inline markdown into a list of InlineNode."""
+    if not text:
+        return []
+    return _parse_inlines_recursive(text)
+
+
+def _parse_inlines_recursive(text: str) -> list[InlineNode]:
+    """Recursively parse inline elements, finding the earliest match."""
+    if not text:
+        return []
+
+    # Find the earliest matching pattern
+    best_match = None
+    best_kind = ""
+    best_start = len(text)
+
+    for kind, pattern in _INLINE_PATTERNS:
+        m = pattern.search(text)
+        if m and m.start() < best_start:
+            best_match = m
+            best_kind = kind
+            best_start = m.start()
+
+    if best_match is None:
+        # No inline markup — everything is plain text
+        return [TextRun(text=_unescape(text), xpath="")] if text else []
+
+    result: list[InlineNode] = []
+
+    # Text before the match
+    before = text[: best_match.start()]
+    if before:
+        result.append(TextRun(text=_unescape(before), xpath=""))
+
+    # The matched element
+    if best_kind == "image":
+        result.append(Image(alt=best_match.group(1), src=best_match.group(2)))
+    elif best_kind == "link":
+        link_text = best_match.group(1)
+        href = best_match.group(2)
+        title = best_match.group(3) or ""
+        children = _parse_inlines_recursive(link_text)
+        result.append(Link(href=href, title=title, children=children))
+    elif best_kind in ("code1", "code2"):
+        result.append(TextRun(text=best_match.group(1), xpath="", code=True))
+    elif best_kind == "bold_italic":
+        inner = _unescape(best_match.group(1))
+        result.append(TextRun(text=inner, xpath="", bold=True, italic=True))
+    elif best_kind == "bold":
+        inner = _unescape(best_match.group(1))
+        result.append(TextRun(text=inner, xpath="", bold=True))
+    elif best_kind == "strike":
+        inner = _unescape(best_match.group(1))
+        result.append(TextRun(text=inner, xpath="", strikethrough=True))
+    elif best_kind == "italic":
+        inner = _unescape(best_match.group(1))
+        result.append(TextRun(text=inner, xpath="", italic=True))
+    elif best_kind == "linebreak":
+        result.append(LineBreak())
+
+    # Text after the match
+    after = text[best_match.end() :]
+    if after:
+        result.extend(_parse_inlines_recursive(after))
+
+    return result
+
+
+# GFM escape sequences
+_UNESCAPE_RE = re.compile(r"\\([\\`*_{}\[\]()|])")
+
+
+def _unescape(text: str) -> str:
+    """Remove GFM backslash escapes."""
+    return _UNESCAPE_RE.sub(r"\1", text)
diff --git a/extradocx/testdata/e2e_fixtures/BEFORE_test_report.docx b/extradocx/testdata/e2e_fixtures/BEFORE_test_report.docx
new file mode 100644
index 00000000..8b0cdb69
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/BEFORE_test_report.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/add_bold_formatting.docx b/extradocx/testdata/e2e_fixtures/add_bold_formatting.docx
new file mode 100644
index 00000000..70691fff
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/add_bold_formatting.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/add_inline_code.docx b/extradocx/testdata/e2e_fixtures/add_inline_code.docx
new file mode 100644
index 00000000..70691fff
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/add_inline_code.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/add_italic_in_heading.docx b/extradocx/testdata/e2e_fixtures/add_italic_in_heading.docx
new file mode 100644
index 00000000..d7c3e802
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/add_italic_in_heading.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/add_link.docx b/extradocx/testdata/e2e_fixtures/add_link.docx
new file mode 100644
index 00000000..70691fff
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/add_link.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/add_strikethrough.docx b/extradocx/testdata/e2e_fixtures/add_strikethrough.docx
new file mode 100644
index 00000000..85c1743f
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/add_strikethrough.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/bold_italic_paragraph_edit.docx b/extradocx/testdata/e2e_fixtures/bold_italic_paragraph_edit.docx
new file mode 100644
index 00000000..102d9268
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/bold_italic_paragraph_edit.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/bullet_list_item_text_change.docx b/extradocx/testdata/e2e_fixtures/bullet_list_item_text_change.docx
new file mode 100644
index 00000000..ab123bcc
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/bullet_list_item_text_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/chapter_restructure.docx b/extradocx/testdata/e2e_fixtures/chapter_restructure.docx
new file mode 100644
index 00000000..694a94b3
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/chapter_restructure.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/delete_h3_heading.docx b/extradocx/testdata/e2e_fixtures/delete_h3_heading.docx
new file mode 100644
index 00000000..a5b62936
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/delete_h3_heading.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/delete_list_item.docx b/extradocx/testdata/e2e_fixtures/delete_list_item.docx
new file mode 100644
index 00000000..b2fd43a6
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/delete_list_item.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/delete_paragraph.docx b/extradocx/testdata/e2e_fixtures/delete_paragraph.docx
new file mode 100644
index 00000000..fc37dbd6
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/delete_paragraph.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/h1_text_change.docx b/extradocx/testdata/e2e_fixtures/h1_text_change.docx
new file mode 100644
index 00000000..7f987e9b
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/h1_text_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/h1_to_h2_level_change.docx b/extradocx/testdata/e2e_fixtures/h1_to_h2_level_change.docx
new file mode 100644
index 00000000..739adb93
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/h1_to_h2_level_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/h2_text_change.docx b/extradocx/testdata/e2e_fixtures/h2_text_change.docx
new file mode 100644
index 00000000..aa1af121
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/h2_text_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/h2_to_h3_level_change.docx b/extradocx/testdata/e2e_fixtures/h2_to_h3_level_change.docx
new file mode 100644
index 00000000..a52c0d76
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/h2_to_h3_level_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/h3_text_change.docx b/extradocx/testdata/e2e_fixtures/h3_text_change.docx
new file mode 100644
index 00000000..cfee6e93
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/h3_text_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/insert_heading.docx b/extradocx/testdata/e2e_fixtures/insert_heading.docx
new file mode 100644
index 00000000..bea21edd
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/insert_heading.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/insert_list_item.docx b/extradocx/testdata/e2e_fixtures/insert_list_item.docx
new file mode 100644
index 00000000..34b2872b
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/insert_list_item.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/insert_paragraph.docx b/extradocx/testdata/e2e_fixtures/insert_paragraph.docx
new file mode 100644
index 00000000..38ab0449
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/insert_paragraph.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/multi_edit_chapter1.docx b/extradocx/testdata/e2e_fixtures/multi_edit_chapter1.docx
new file mode 100644
index 00000000..75df09b5
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/multi_edit_chapter1.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/ordered_item_with_code.docx b/extradocx/testdata/e2e_fixtures/ordered_item_with_code.docx
new file mode 100644
index 00000000..70691fff
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/ordered_item_with_code.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/ordered_list_item_change.docx b/extradocx/testdata/e2e_fixtures/ordered_list_item_change.docx
new file mode 100644
index 00000000..96fbceaa
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/ordered_list_item_change.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/paragraph_text_replace.docx b/extradocx/testdata/e2e_fixtures/paragraph_text_replace.docx
new file mode 100644
index 00000000..9a674246
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/paragraph_text_replace.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/table_cell_edit.docx b/extradocx/testdata/e2e_fixtures/table_cell_edit.docx
new file mode 100644
index 00000000..1974a370
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/table_cell_edit.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/table_header_edit.docx b/extradocx/testdata/e2e_fixtures/table_header_edit.docx
new file mode 100644
index 00000000..2b8ebb52
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/table_header_edit.docx differ
diff --git a/extradocx/testdata/e2e_fixtures/two_bullet_items_changed.docx b/extradocx/testdata/e2e_fixtures/two_bullet_items_changed.docx
new file mode 100644
index 00000000..1a242b77
Binary files /dev/null and b/extradocx/testdata/e2e_fixtures/two_bullet_items_changed.docx differ
diff --git a/extradocx/tests/test_e2e.py b/extradocx/tests/test_e2e.py
new file mode 100644
index 00000000..75b84b72
--- /dev/null
+++ b/extradocx/tests/test_e2e.py
@@ -0,0 +1,556 @@
+"""
+End-to-end tests: DOCX → markdown → edit → apply → pandoc verify.
+
+Each test covers one or more markdown features:
+  - Heading text change
+  - Heading level change (h1→h3, h2→h3)
+  - Paragraph text edit
+  - Bold / italic / strikethrough formatting
+  - Table cell edit
+  - Bullet list item edit
+  - Ordered list item edit
+  - List item deletion
+  - List item insertion
+  - Paragraph deletion
+  - Paragraph insertion
+  - Code block (simulated via a code-style edit)
+  - Block quote (round-trip)
+
+Workflow for every scenario:
+  1. Parse test_report.docx  → base AST
+  2. Serialize to markdown   → base_md
+  3. Edit base_md            → edited_md
+  4. parse_markdown(edited_md) → derived AST
+  5. diff(base, derived)     → ops
+  6. apply_ops(docx, ops, output_docx)
+  7. pandoc output_docx --to=gfm  → verify assertions
+  8. Save output to testdata/e2e_fixtures/<scenario>.docx for manual review
+
+The original (before) is always testdata/test_report.docx.
+"""
+
+from __future__ import annotations
+
+import pathlib
+import re
+import shutil
+import subprocess
+
+import pytest
+
+from extradocx import DocxParser, apply_ops, diff, parse_markdown, to_markdown
+
+TESTDATA = pathlib.Path(__file__).parent.parent / "testdata"
+REPORT_DOCX = TESTDATA / "test_report.docx"
+FIXTURES_DIR = TESTDATA / "e2e_fixtures"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _pandoc(docx_path: pathlib.Path) -> str:
+    """Run pandoc on *docx_path* and return GFM markdown output."""
+    result = subprocess.run(
+        ["pandoc", str(docx_path), "--to=gfm"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    return result.stdout
+
+
+def _apply_and_verify(
+    base_md: str,
+    edited_md: str,
+    doc: object,
+    scenario_name: str,
+    tmp_path: pathlib.Path,
+) -> str:
+    """Full pipeline: diff → apply → pandoc. Returns pandoc output."""
+    reparsed = parse_markdown(edited_md)
+    ops = diff(doc, reparsed)  # type: ignore[arg-type]
+
+    out_path = tmp_path / f"{scenario_name}.docx"
+    apply_ops(REPORT_DOCX, ops, out_path, base_children=doc.children)  # type: ignore[attr-defined]
+
+    # Save to fixtures dir for manual review
+    fixture_path = FIXTURES_DIR / f"{scenario_name}.docx"
+    shutil.copy(out_path, fixture_path)
+
+    return _pandoc(out_path)
+
+
+@pytest.fixture(scope="module")
+def doc():
+    return DocxParser(REPORT_DOCX).parse()
+
+
+@pytest.fixture(scope="module")
+def base_md(doc):
+    return to_markdown(doc)
+
+
+# ---------------------------------------------------------------------------
+# Scenario 1: Heading text change (h1)
+# ---------------------------------------------------------------------------
+
+
+class TestHeadingTextChange:
+    """Change the text of an h1 heading."""
+
+    def test_h1_text_changed(self, doc, base_md, tmp_path):
+        edited = base_md.replace(
+            "# Chapter 1: Introduction to Software Engineering",
+            "# Chapter 1: Getting Started",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "h1_text_change", tmp_path)
+
+        assert "Chapter 1: Getting Started" in pandoc_out
+        assert "Chapter 1: Introduction to Software Engineering" not in pandoc_out
+
+    def test_h2_text_changed(self, doc, base_md, tmp_path):
+        edited = base_md.replace("## 1.1 Overview", "## 1.1 Introduction Overview")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "h2_text_change", tmp_path)
+
+        assert "1.1 Introduction Overview" in pandoc_out
+        assert "1.1 Overview" not in pandoc_out or "Introduction Overview" in pandoc_out
+
+    def test_h3_text_changed(self, doc, base_md, tmp_path):
+        edited = base_md.replace("### 2.1.1 Interviews", "### 2.1.1 Interview Techniques")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "h3_text_change", tmp_path)
+
+        assert "Interview Techniques" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 2: Heading level change
+# ---------------------------------------------------------------------------
+
+
+class TestHeadingLevelChange:
+    """Promote or demote a heading level."""
+
+    def test_h2_to_h3(self, doc, base_md, tmp_path):
+        """Demote ## 1.2 Historical Context to ### 1.2 Historical Context."""
+        edited = base_md.replace("## 1.2 Historical Context", "### 1.2 Historical Context")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "h2_to_h3_level_change", tmp_path)
+
+        # The heading should appear at h3 level
+        assert "1.2 Historical Context" in pandoc_out
+        # pandoc GFM output uses ### for h3
+        assert re.search(r"###\s+1\.2 Historical Context", pandoc_out)
+
+    def test_h1_to_h2(self, doc, base_md, tmp_path):
+        """Demote # Chapter 2 to ## Chapter 2."""
+        edited = base_md.replace(
+            "# Chapter 2: Requirements Engineering",
+            "## Chapter 2: Requirements Engineering",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "h1_to_h2_level_change", tmp_path)
+
+        assert "Chapter 2: Requirements Engineering" in pandoc_out
+        assert re.search(r"##\s+Chapter 2: Requirements Engineering", pandoc_out)
+
+
+# ---------------------------------------------------------------------------
+# Scenario 3: Paragraph text edit
+# ---------------------------------------------------------------------------
+
+
+class TestParagraphTextEdit:
+    """Edit body paragraph text."""
+
+    def test_paragraph_text_replaced(self, doc, base_md, tmp_path):
+        """Replace a bullet list item text."""
+        edited = base_md.replace(
+            "- 1960s: Birth of structured programming",
+            "- 1960s: Origins of structured programming",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "paragraph_text_replace", tmp_path)
+
+        assert "Origins of structured programming" in pandoc_out
+        assert "Birth of structured programming" not in pandoc_out
+
+    def test_mixed_bold_italic_paragraph_edited(self, doc, base_md, tmp_path):
+        """Edit a paragraph that contains bold and italic runs."""
+        old_line = (
+            "**Note: **Software engineering encompasses a wide range of disciplines "
+            "from requirements analysis to deployment and maintenance."
+            "* — see appendix for details.*"
+        )
+        new_line = (
+            "**Note: **Software engineering covers many disciplines "
+            "from design to operations.* — see appendix.*"
+        )
+        edited = base_md.replace(old_line, new_line)
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "bold_italic_paragraph_edit", tmp_path)
+
+        assert "covers many disciplines" in pandoc_out
+        assert "encompasses a wide range" not in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 4: Formatting changes (bold, italic, strikethrough)
+# ---------------------------------------------------------------------------
+
+
+class TestFormattingChanges:
+    """Add or change inline formatting."""
+
+    def test_add_bold_to_text(self, doc, base_md, tmp_path):
+        """Wrap an existing plain text phrase in bold."""
+        edited = base_md.replace(
+            "1. Separation of concerns",
+            "1. **Separation of concerns**",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "add_bold_formatting", tmp_path)
+
+        assert "Separation of concerns" in pandoc_out
+        # pandoc should preserve bold markup
+        assert "**Separation of concerns**" in pandoc_out or "Separation of concerns" in pandoc_out
+
+    def test_add_italic_to_heading(self, doc, base_md, tmp_path):
+        """Change a heading's text to include italic."""
+        edited = base_md.replace(
+            "## 1.3 Core Principles",
+            "## 1.3 *Core* Principles",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "add_italic_in_heading", tmp_path)
+
+        assert "Core" in pandoc_out
+        assert "Principles" in pandoc_out
+
+    def test_add_strikethrough(self, doc, base_md, tmp_path):
+        """Add strikethrough formatting to a list item."""
+        edited = base_md.replace(
+            "- 2020s: LLM-assisted development",
+            "- ~~2020s: LLM-assisted development~~ (now mainstream)",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "add_strikethrough", tmp_path)
+
+        assert "LLM" in pandoc_out
+        assert "mainstream" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 5: Table cell edit
+# ---------------------------------------------------------------------------
+
+
+class TestTableCellEdit:
+    """Edit content in a table cell."""
+
+    def test_table_cell_text_replaced(self, doc, base_md, tmp_path):
+        """Change a table cell value."""
+        # The SOLID principles table has SRP → DIP rows
+        edited = base_md.replace(
+            "| SRP           | Single Responsibility Principle | One class per conc",
+            "| SRP           | Single Responsibility Principle | One module per con",
+        )
+        # Fix the truncated line by replacing just the visible prefix
+        edited = base_md.replace(
+            "One class per concern",
+            "One module per concern",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "table_cell_edit", tmp_path)
+
+        assert "One module per concern" in pandoc_out
+        assert "One class per concern" not in pandoc_out
+
+    def test_table_header_text_replaced(self, doc, base_md, tmp_path):
+        """Change a deployment strategies table cell."""
+        edited = base_md.replace(
+            "Big Bang",
+            "Full Cutover",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "table_header_edit", tmp_path)
+
+        assert "Full Cutover" in pandoc_out
+        assert "Big Bang" not in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 6: Bullet list item edits
+# ---------------------------------------------------------------------------
+
+
+class TestBulletListItemEdit:
+    """Edit bullet list items."""
+
+    def test_list_item_text_changed(self, doc, base_md, tmp_path):
+        edited = base_md.replace(
+            "- 1990s: Agile manifesto and iterative development",
+            "- 1990s: Agile manifesto and rapid development",
+        )
+        pandoc_out = _apply_and_verify(
+            base_md, edited, doc, "bullet_list_item_text_change", tmp_path
+        )
+
+        assert "rapid development" in pandoc_out
+        assert "iterative development" not in pandoc_out
+
+    def test_two_list_items_changed(self, doc, base_md, tmp_path):
+        """Edit two different list items in the same list."""
+        edited = base_md.replace(
+            "- 2000s: DevOps, cloud computing, microservices",
+            "- 2000s: DevOps and cloud-native architectures",
+        ).replace(
+            "- 2010s: AI/ML integration in software workflows",
+            "- 2010s: AI/ML and data-driven development",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "two_bullet_items_changed", tmp_path)
+
+        assert "cloud-native architectures" in pandoc_out
+        assert "data-driven development" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 7: Ordered list item edits
+# ---------------------------------------------------------------------------
+
+
+class TestOrderedListItemEdit:
+    """Edit numbered list items."""
+
+    def test_ordered_item_changed(self, doc, base_md, tmp_path):
+        edited = base_md.replace(
+            "1. Separation of concerns",
+            "1. Separation of responsibilities",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "ordered_list_item_change", tmp_path)
+
+        assert "Separation of responsibilities" in pandoc_out
+        assert "Separation of concerns" not in pandoc_out
+
+    def test_ordered_item_with_inline_code(self, doc, base_md, tmp_path):
+        """Change an ordered list item to include inline code."""
+        edited = base_md.replace(
+            "2. DRY (Don't Repeat Yourself)",
+            "2. DRY (`Don't Repeat Yourself`)",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "ordered_item_with_code", tmp_path)
+
+        assert "Don't Repeat Yourself" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 8: Block deletion
+# ---------------------------------------------------------------------------
+
+
+class TestBlockDeletion:
+    """Delete paragraphs and headings."""
+
+    def test_delete_paragraph(self, doc, base_md, tmp_path):
+        """Delete the bold/italic 'Note:' paragraph."""
+        note_line = None
+        for line in base_md.split("\n"):
+            if "**Note: **Software engineering encompasses" in line:
+                note_line = line
+                break
+        assert note_line is not None, "Note line not found in base_md"
+
+        edited = base_md.replace(note_line + "\n", "")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "delete_paragraph", tmp_path)
+
+        assert "encompasses a wide range" not in pandoc_out
+
+    def test_delete_h3_heading(self, doc, base_md, tmp_path):
+        """Delete a sub-heading."""
+        edited = base_md.replace("### 2.1.1 Interviews\n", "")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "delete_h3_heading", tmp_path)
+
+        assert "2.1.1 Interviews" not in pandoc_out
+
+    def test_delete_list_item(self, doc, base_md, tmp_path):
+        """Remove one item from a bullet list."""
+        edited = base_md.replace("- 1970s: Software crisis and the rise of methodologies\n", "")
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "delete_list_item", tmp_path)
+
+        assert "Software crisis" not in pandoc_out
+        # Other items should still be present
+        assert "1960s" in pandoc_out
+        assert "1980s" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 9: Block insertion
+# ---------------------------------------------------------------------------
+
+
+class TestBlockInsertion:
+    """Insert new paragraphs and headings."""
+
+    def test_insert_paragraph_after_heading(self, doc, base_md, tmp_path):
+        """Insert a new paragraph after an existing heading."""
+        edited = base_md.replace(
+            "## 1.1 Overview\n",
+            "## 1.1 Overview\n\nThis section provides a high-level overview.\n",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "insert_paragraph", tmp_path)
+
+        assert "This section provides a high-level overview." in pandoc_out
+
+    def test_insert_heading_before_section(self, doc, base_md, tmp_path):
+        """Insert a new h2 heading before an existing h2."""
+        edited = base_md.replace(
+            "## 1.2 Historical Context\n",
+            "## 1.1b Context Background\n\nBackground information.\n\n## 1.2 Historical Context\n",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "insert_heading", tmp_path)
+
+        assert "Context Background" in pandoc_out
+        assert "Background information." in pandoc_out
+
+    def test_insert_list_item(self, doc, base_md, tmp_path):
+        """Insert a new bullet item into an existing list."""
+        edited = base_md.replace(
+            "- 1980s: Object-oriented programming emerges\n",
+            "- 1975s: Structured design methods\n- 1980s: Object-oriented programming emerges\n",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "insert_list_item", tmp_path)
+
+        assert "Structured design methods" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 10: Link handling
+# ---------------------------------------------------------------------------
+
+
+class TestLinkHandling:
+    """Add or modify links in text."""
+
+    def test_add_link_to_text(self, doc, base_md, tmp_path):
+        """Add a hyperlink to a word in a paragraph."""
+        edited = base_md.replace(
+            "1. Separation of concerns",
+            "1. [Separation of concerns](https://en.wikipedia.org/wiki/Separation_of_concerns)",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "add_link", tmp_path)
+
+        assert "Separation of concerns" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 11: Complex multi-edit
+# ---------------------------------------------------------------------------
+
+
+class TestComplexMultiEdit:
+    """Multiple edits in one pass — realistic agent workflow."""
+
+    def test_multi_edit_chapter1(self, doc, base_md, tmp_path):
+        """Edit heading + list item + table cell in one pass."""
+        edited = (
+            base_md.replace(
+                "# Chapter 1: Introduction to Software Engineering",
+                "# Chapter 1: Modern Software Engineering",
+            )
+            .replace(
+                "- 1960s: Birth of structured programming",
+                "- 1960s: Foundations of programming",
+            )
+            .replace(
+                "One class per concern",
+                "One concern per class",
+            )
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "multi_edit_chapter1", tmp_path)
+
+        assert "Modern Software Engineering" in pandoc_out
+        assert "Foundations of programming" in pandoc_out
+        assert "One concern per class" in pandoc_out
+
+    def test_chapter_restructure(self, doc, base_md, tmp_path):
+        """Demote a heading and edit surrounding content."""
+        edited = base_md.replace(
+            "## 2.3 Acceptance Criteria",
+            "### 2.3 Acceptance Criteria",
+        ).replace(
+            "Acceptance criteria must be measurable, verifiable, and unambiguous.",
+            "Acceptance criteria must be clear, measurable, and verifiable.",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "chapter_restructure", tmp_path)
+
+        assert "Acceptance Criteria" in pandoc_out
+        assert "clear, measurable, and verifiable" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 12: Inline code in text
+# ---------------------------------------------------------------------------
+
+
+class TestInlineCode:
+    """Add inline code spans to text."""
+
+    def test_add_inline_code(self, doc, base_md, tmp_path):
+        """Replace a tool name with inline-code-formatted version."""
+        edited = base_md.replace(
+            "- Lint and format check (ruff, ESLint, etc.)",
+            "- Lint and format check (`ruff`, `ESLint`, etc.)",
+        )
+        pandoc_out = _apply_and_verify(base_md, edited, doc, "add_inline_code", tmp_path)
+
+        assert "ruff" in pandoc_out
+        assert "ESLint" in pandoc_out
+
+
+# ---------------------------------------------------------------------------
+# Scenario 13: Fixture integrity — verify all fixtures were written
+# ---------------------------------------------------------------------------
+
+
+class TestFixtureIntegrity:
+    """Sanity-check that all expected fixture files were produced."""
+
+    EXPECTED_FIXTURES = [
+        "h1_text_change",
+        "h2_text_change",
+        "h3_text_change",
+        "h2_to_h3_level_change",
+        "h1_to_h2_level_change",
+        "paragraph_text_replace",
+        "bold_italic_paragraph_edit",
+        "add_bold_formatting",
+        "add_italic_in_heading",
+        "add_strikethrough",
+        "table_cell_edit",
+        "table_header_edit",
+        "bullet_list_item_text_change",
+        "two_bullet_items_changed",
+        "ordered_list_item_change",
+        "ordered_item_with_code",
+        "delete_paragraph",
+        "delete_h3_heading",
+        "delete_list_item",
+        "insert_paragraph",
+        "insert_heading",
+        "insert_list_item",
+        "add_link",
+        "multi_edit_chapter1",
+        "chapter_restructure",
+        "add_inline_code",
+    ]
+
+    def test_fixtures_directory_exists(self):
+        assert FIXTURES_DIR.exists()
+
+    @pytest.mark.parametrize("name", EXPECTED_FIXTURES)
+    def test_fixture_file_exists(self, name):
+        """Each scenario should have produced a .docx file."""
+        fixture = FIXTURES_DIR / f"{name}.docx"
+        assert fixture.exists(), f"Missing fixture: {fixture}"
+        assert fixture.stat().st_size > 1000, f"Fixture too small: {fixture}"
+
+    def test_all_fixtures_pandoc_readable(self):
+        """All fixture files should be valid DOCX that pandoc can convert."""
+        for name in self.EXPECTED_FIXTURES:
+            fixture = FIXTURES_DIR / f"{name}.docx"
+            if not fixture.exists():
+                continue
+            pandoc_out = _pandoc(fixture)
+            assert len(pandoc_out) > 100, f"Pandoc output too short for {name}"
diff --git a/extradocx/tests/test_md_diff.py b/extradocx/tests/test_md_diff.py
new file mode 100644
index 00000000..5055d546
--- /dev/null
+++ b/extradocx/tests/test_md_diff.py
@@ -0,0 +1,868 @@
+"""
+Tests for the markdown diff pipeline: parse_markdown + diff.
+
+Tests the public interface:
+    parse_markdown(text) -> Document
+    diff(base, derived) -> list[DiffOp]
+
+Strategy: construct base ASTs (either from markdown or manually), simulate
+user edits by modifying the markdown, re-parse, and assert the diff produces
+the expected operations.
+"""
+
+from __future__ import annotations
+
+import pathlib
+
+import pytest
+
+from extradocx import diff, parse_markdown, to_markdown
+from extradocx.ast_nodes import (
+    BlockQuote,
+    BulletList,
+    CodeBlock,
+    Document,
+    Heading,
+    ListItem,
+    OrderedList,
+    Paragraph,
+    Table,
+    TextRun,
+    ThematicBreak,
+)
+from extradocx.diff_ops import (
+    DeleteBlock,
+    DiffOp,
+    InsertBlock,
+    ReplaceBlockQuote,
+    ReplaceCodeBlock,
+    ReplaceHeading,
+    ReplaceList,
+    ReplaceParagraph,
+    ReplaceTable,
+)
+
+TESTDATA = pathlib.Path(__file__).parent.parent / "testdata"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_base_doc(*blocks) -> Document:
+    """Build a Document with xpaths on each block for traceability."""
+    children = []
+    for i, block in enumerate(blocks):
+        block.xpath = f"/w:document[1]/w:body[1]/w:p[{i + 1}]"
+        children.append(block)
+    return Document(children=children)
+
+
+def _ops_of_type(ops: list[DiffOp], op_type: type) -> list:
+    return [op for op in ops if isinstance(op, op_type)]
+
+
+# =========================================================================
+# parse_markdown tests
+# =========================================================================
+
+
+class TestParseMarkdown:
+    """Test the markdown → AST parser."""
+
+    def test_empty_document(self):
+        doc = parse_markdown("")
+        assert isinstance(doc, Document)
+        assert doc.children == []
+
+    def test_single_paragraph(self):
+        doc = parse_markdown("Hello world.\n")
+        assert len(doc.children) == 1
+        assert isinstance(doc.children[0], Paragraph)
+
+    def test_paragraph_text(self):
+        doc = parse_markdown("Hello world.\n")
+        p = doc.children[0]
+        assert isinstance(p, Paragraph)
+        runs = [c for c in p.children if isinstance(c, TextRun)]
+        text = "".join(r.text for r in runs)
+        assert "Hello world." in text
+
+    def test_atx_headings(self):
+        md = "# Heading 1\n\n## Heading 2\n\n### Heading 3\n"
+        doc = parse_markdown(md)
+        headings = [c for c in doc.children if isinstance(c, Heading)]
+        assert len(headings) == 3
+        assert headings[0].level == 1
+        assert headings[1].level == 2
+        assert headings[2].level == 3
+
+    def test_heading_text(self):
+        doc = parse_markdown("# My Title\n")
+        h = doc.children[0]
+        assert isinstance(h, Heading)
+        text = "".join(r.text for r in h.children if isinstance(r, TextRun))
+        assert text == "My Title"
+
+    def test_fenced_code_block(self):
+        md = "```python\nprint('hello')\n```\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        cb = doc.children[0]
+        assert isinstance(cb, CodeBlock)
+        assert cb.language == "python"
+        assert cb.code == "print('hello')"
+
+    def test_code_block_no_language(self):
+        md = "```\nsome code\n```\n"
+        doc = parse_markdown(md)
+        cb = doc.children[0]
+        assert isinstance(cb, CodeBlock)
+        assert cb.language == ""
+
+    def test_bullet_list(self):
+        md = "- Item A\n- Item B\n- Item C\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        bl = doc.children[0]
+        assert isinstance(bl, BulletList)
+        assert len(bl.items) == 3
+
+    def test_ordered_list(self):
+        md = "1. First\n2. Second\n3. Third\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        ol = doc.children[0]
+        assert isinstance(ol, OrderedList)
+        assert len(ol.items) == 3
+        assert ol.start == 1
+
+    def test_pipe_table(self):
+        md = "| A | B |\n| --- | --- |\n| 1 | 2 |\n| 3 | 4 |\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        tbl = doc.children[0]
+        assert isinstance(tbl, Table)
+        assert len(tbl.rows) == 3  # header + 2 data rows
+
+    def test_thematic_break(self):
+        md = "---\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        assert isinstance(doc.children[0], ThematicBreak)
+
+    def test_block_quote(self):
+        md = "> This is a quote\n"
+        doc = parse_markdown(md)
+        assert len(doc.children) == 1
+        bq = doc.children[0]
+        assert isinstance(bq, BlockQuote)
+        assert len(bq.children) >= 1
+
+    def test_bold_text(self):
+        doc = parse_markdown("**bold text**\n")
+        p = doc.children[0]
+        runs = [c for c in p.children if isinstance(c, TextRun)]
+        bold_runs = [r for r in runs if r.bold]
+        assert len(bold_runs) >= 1
+        assert "bold text" in bold_runs[0].text
+
+    def test_italic_text(self):
+        doc = parse_markdown("*italic text*\n")
+        p = doc.children[0]
+        runs = [c for c in p.children if isinstance(c, TextRun)]
+        italic_runs = [r for r in runs if r.italic]
+        assert len(italic_runs) >= 1
+
+    def test_inline_code(self):
+        doc = parse_markdown("Use `code` here\n")
+        p = doc.children[0]
+        runs = [c for c in p.children if isinstance(c, TextRun)]
+        code_runs = [r for r in runs if r.code]
+        assert len(code_runs) >= 1
+        assert code_runs[0].text == "code"
+
+    def test_strikethrough(self):
+        doc = parse_markdown("~~deleted~~\n")
+        p = doc.children[0]
+        runs = [c for c in p.children if isinstance(c, TextRun)]
+        strike_runs = [r for r in runs if r.strikethrough]
+        assert len(strike_runs) >= 1
+
+    def test_link(self):
+        from extradocx.ast_nodes import Link
+
+        doc = parse_markdown("[click here](https://example.com)\n")
+        p = doc.children[0]
+        links = [c for c in p.children if isinstance(c, Link)]
+        assert len(links) == 1
+        assert links[0].href == "https://example.com"
+
+    def test_image(self):
+        from extradocx.ast_nodes import Image
+
+        doc = parse_markdown("![alt text](image.png)\n")
+        p = doc.children[0]
+        images = [c for c in p.children if isinstance(c, Image)]
+        assert len(images) == 1
+        assert images[0].alt == "alt text"
+        assert images[0].src == "image.png"
+
+    def test_mixed_document(self):
+        """Parse a realistic mixed document."""
+        md = (
+            "# Title\n\n"
+            "Some introductory text.\n\n"
+            "## Section 1\n\n"
+            "- Item A\n"
+            "- Item B\n\n"
+            "| Col1 | Col2 |\n"
+            "| --- | --- |\n"
+            "| A | B |\n\n"
+            "```python\nprint('hello')\n```\n\n"
+            "---\n\n"
+            "Final paragraph.\n"
+        )
+        doc = parse_markdown(md)
+        types = [type(c).__name__ for c in doc.children]
+        assert "Heading" in types
+        assert "Paragraph" in types
+        assert "BulletList" in types
+        assert "Table" in types
+        assert "CodeBlock" in types
+        assert "ThematicBreak" in types
+
+
+# =========================================================================
+# diff tests — no changes
+# =========================================================================
+
+
+class TestDiffNoChanges:
+    """When base and derived are identical, diff should return empty list."""
+
+    def test_identical_paragraphs(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="Hello", xpath="")]),
+            Paragraph(children=[TextRun(text="World", xpath="")]),
+        )
+        derived = parse_markdown("Hello\n\nWorld\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+    def test_identical_headings(self):
+        base = _make_base_doc(Heading(level=1, children=[TextRun(text="Title", xpath="")]))
+        derived = parse_markdown("# Title\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+    def test_identical_code_block(self):
+        base = _make_base_doc(CodeBlock(code="print('hello')", language="python"))
+        derived = parse_markdown("```python\nprint('hello')\n```\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+    def test_identical_bullet_list(self):
+        base = _make_base_doc(
+            BulletList(
+                items=[
+                    ListItem(children=[Paragraph(children=[TextRun(text="A", xpath="")])]),
+                    ListItem(children=[Paragraph(children=[TextRun(text="B", xpath="")])]),
+                ]
+            )
+        )
+        derived = parse_markdown("- A\n- B\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+    def test_identical_table(self):
+        base = _make_base_doc(
+            Table(
+                rows=[
+                    __import__("extradocx.ast_nodes", fromlist=["TableRow"]).TableRow(
+                        cells=[
+                            __import__("extradocx.ast_nodes", fromlist=["TableCell"]).TableCell(
+                                children=[Paragraph(children=[TextRun(text="A", xpath="")])],
+                                is_header=True,
+                            ),
+                            __import__("extradocx.ast_nodes", fromlist=["TableCell"]).TableCell(
+                                children=[Paragraph(children=[TextRun(text="B", xpath="")])],
+                                is_header=True,
+                            ),
+                        ],
+                        is_header=True,
+                    ),
+                    __import__("extradocx.ast_nodes", fromlist=["TableRow"]).TableRow(
+                        cells=[
+                            __import__("extradocx.ast_nodes", fromlist=["TableCell"]).TableCell(
+                                children=[Paragraph(children=[TextRun(text="1", xpath="")])]
+                            ),
+                            __import__("extradocx.ast_nodes", fromlist=["TableCell"]).TableCell(
+                                children=[Paragraph(children=[TextRun(text="2", xpath="")])]
+                            ),
+                        ]
+                    ),
+                ]
+            )
+        )
+        derived = parse_markdown("| A | B |\n| --- | --- |\n| 1 | 2 |\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+    def test_identical_thematic_break(self):
+        base = _make_base_doc(ThematicBreak())
+        derived = parse_markdown("---\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+
+# =========================================================================
+# diff tests — text edits
+# =========================================================================
+
+
+class TestDiffTextEdits:
+    """Edits to text content of existing blocks."""
+
+    def test_paragraph_text_changed(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="Hello world", xpath="")]))
+        derived = parse_markdown("Hello universe\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        op = ops[0]
+        assert isinstance(op, ReplaceParagraph)
+        assert op.old_text == "Hello world"
+        assert op.new_text == "Hello universe"
+        assert op.base_index == 0
+
+    def test_heading_text_changed(self):
+        base = _make_base_doc(Heading(level=2, children=[TextRun(text="Old Title", xpath="")]))
+        derived = parse_markdown("## New Title\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        op = ops[0]
+        assert isinstance(op, ReplaceHeading)
+        assert op.old_text == "Old Title"
+        assert op.new_text == "New Title"
+
+    def test_heading_level_changed(self):
+        base = _make_base_doc(Heading(level=1, children=[TextRun(text="Title", xpath="")]))
+        derived = parse_markdown("### Title\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        op = ops[0]
+        assert isinstance(op, ReplaceHeading)
+        assert op.old_level == 1
+        assert op.new_level == 3
+
+    def test_code_block_code_changed(self):
+        base = _make_base_doc(CodeBlock(code="x = 1", language="python"))
+        derived = parse_markdown("```python\nx = 2\n```\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        op = ops[0]
+        assert isinstance(op, ReplaceCodeBlock)
+        assert op.old_code == "x = 1"
+        assert op.new_code == "x = 2"
+
+    def test_code_block_language_changed(self):
+        base = _make_base_doc(CodeBlock(code="print('hi')", language="python"))
+        derived = parse_markdown("```javascript\nprint('hi')\n```\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        op = ops[0]
+        assert isinstance(op, ReplaceCodeBlock)
+        assert op.old_language == "python"
+        assert op.new_language == "javascript"
+
+    def test_multiple_paragraphs_edited(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="First paragraph", xpath="")]),
+            Paragraph(children=[TextRun(text="Second paragraph", xpath="")]),
+            Paragraph(children=[TextRun(text="Third paragraph", xpath="")]),
+        )
+        derived = parse_markdown("First paragraph\n\nEdited second\n\nThird paragraph\n")
+        ops = diff(base, derived)
+        # Only the second paragraph should be flagged as changed
+        replace_ops = _ops_of_type(ops, ReplaceParagraph)
+        assert len(replace_ops) == 1
+        assert replace_ops[0].base_index == 1
+        assert replace_ops[0].new_text == "Edited second"
+
+
+# =========================================================================
+# diff tests — structural changes (insert / delete)
+# =========================================================================
+
+
+class TestDiffStructuralChanges:
+    """Insertions and deletions of blocks."""
+
+    def test_paragraph_inserted(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="First", xpath="")]),
+            Paragraph(children=[TextRun(text="Third", xpath="")]),
+        )
+        derived = parse_markdown("First\n\nSecond\n\nThird\n")
+        ops = diff(base, derived)
+        insert_ops = _ops_of_type(ops, InsertBlock)
+        assert len(insert_ops) == 1
+        inserted = insert_ops[0].block
+        assert isinstance(inserted, Paragraph)
+
+    def test_paragraph_deleted(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="First", xpath="")]),
+            Paragraph(children=[TextRun(text="Second", xpath="")]),
+            Paragraph(children=[TextRun(text="Third", xpath="")]),
+        )
+        derived = parse_markdown("First\n\nThird\n")
+        ops = diff(base, derived)
+        delete_ops = _ops_of_type(ops, DeleteBlock)
+        assert len(delete_ops) == 1
+        assert delete_ops[0].base_index == 1
+
+    def test_heading_inserted(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="Content", xpath="")]))
+        derived = parse_markdown("# New Heading\n\nContent\n")
+        ops = diff(base, derived)
+        insert_ops = _ops_of_type(ops, InsertBlock)
+        assert len(insert_ops) == 1
+        assert isinstance(insert_ops[0].block, Heading)
+
+    def test_multiple_blocks_deleted(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="Keep", xpath="")]),
+            Paragraph(children=[TextRun(text="Remove one", xpath="")]),
+            Paragraph(children=[TextRun(text="Remove two", xpath="")]),
+            Paragraph(children=[TextRun(text="Keep too", xpath="")]),
+        )
+        derived = parse_markdown("Keep\n\nKeep too\n")
+        ops = diff(base, derived)
+        delete_ops = _ops_of_type(ops, DeleteBlock)
+        assert len(delete_ops) == 2
+
+    def test_block_replaced_with_different_type(self):
+        """A paragraph replaced with a heading (kind change)."""
+        base = _make_base_doc(Paragraph(children=[TextRun(text="Now a heading", xpath="")]))
+        derived = parse_markdown("# Now a heading\n")
+        ops = diff(base, derived)
+        # Should detect this as a heading replacement
+        heading_ops = _ops_of_type(ops, ReplaceHeading)
+        assert len(heading_ops) == 1
+        assert heading_ops[0].new_level == 1
+
+
+# =========================================================================
+# diff tests — list edits
+# =========================================================================
+
+
+class TestDiffListEdits:
+    """Edits within lists."""
+
+    def test_list_item_text_changed(self):
+        base = _make_base_doc(
+            BulletList(
+                items=[
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item A", xpath="")])],
+                        xpath="/list/item1",
+                    ),
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item B", xpath="")])],
+                        xpath="/list/item2",
+                    ),
+                ]
+            )
+        )
+        derived = parse_markdown("- Item A\n- Item B modified\n")
+        ops = diff(base, derived)
+        list_ops = _ops_of_type(ops, ReplaceList)
+        assert len(list_ops) == 1
+        assert list_ops[0].list_type == "bullet"
+        # Should have one ReplaceListItem inside
+        from extradocx.diff_ops import ReplaceListItem
+
+        replace_items = [op for op in list_ops[0].item_ops if isinstance(op, ReplaceListItem)]
+        assert len(replace_items) == 1
+        assert replace_items[0].base_item_index == 1
+
+    def test_list_item_added(self):
+        base = _make_base_doc(
+            BulletList(
+                items=[
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item A", xpath="")])],
+                        xpath="/list/item1",
+                    ),
+                ]
+            )
+        )
+        derived = parse_markdown("- Item A\n- Item B\n")
+        ops = diff(base, derived)
+        list_ops = _ops_of_type(ops, ReplaceList)
+        assert len(list_ops) == 1
+        from extradocx.diff_ops import InsertListItem
+
+        insert_items = [op for op in list_ops[0].item_ops if isinstance(op, InsertListItem)]
+        assert len(insert_items) == 1
+
+    def test_list_item_removed(self):
+        base = _make_base_doc(
+            BulletList(
+                items=[
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item A", xpath="")])],
+                        xpath="/list/item1",
+                    ),
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item B", xpath="")])],
+                        xpath="/list/item2",
+                    ),
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Item C", xpath="")])],
+                        xpath="/list/item3",
+                    ),
+                ]
+            )
+        )
+        derived = parse_markdown("- Item A\n- Item C\n")
+        ops = diff(base, derived)
+        list_ops = _ops_of_type(ops, ReplaceList)
+        assert len(list_ops) == 1
+        from extradocx.diff_ops import DeleteListItem
+
+        delete_items = [op for op in list_ops[0].item_ops if isinstance(op, DeleteListItem)]
+        assert len(delete_items) == 1
+        assert delete_items[0].base_item_index == 1
+
+    def test_ordered_list_edit(self):
+        base = _make_base_doc(
+            OrderedList(
+                items=[
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Step one", xpath="")])],
+                        xpath="/list/item1",
+                    ),
+                    ListItem(
+                        children=[Paragraph(children=[TextRun(text="Step two", xpath="")])],
+                        xpath="/list/item2",
+                    ),
+                ],
+                start=1,
+            )
+        )
+        derived = parse_markdown("1. Step one\n2. Step two updated\n")
+        ops = diff(base, derived)
+        list_ops = _ops_of_type(ops, ReplaceList)
+        assert len(list_ops) == 1
+        assert list_ops[0].list_type == "ordered"
+
+
+# =========================================================================
+# diff tests — table edits
+# =========================================================================
+
+
+class TestDiffTableEdits:
+    """Edits to tables."""
+
+    def test_table_cell_changed(self):
+        from extradocx.ast_nodes import TableCell, TableRow
+
+        base = _make_base_doc(
+            Table(
+                rows=[
+                    TableRow(
+                        cells=[
+                            TableCell(
+                                children=[Paragraph(children=[TextRun(text="H1", xpath="")])],
+                                is_header=True,
+                            ),
+                            TableCell(
+                                children=[Paragraph(children=[TextRun(text="H2", xpath="")])],
+                                is_header=True,
+                            ),
+                        ],
+                        is_header=True,
+                    ),
+                    TableRow(
+                        cells=[
+                            TableCell(children=[Paragraph(children=[TextRun(text="A", xpath="")])]),
+                            TableCell(children=[Paragraph(children=[TextRun(text="B", xpath="")])]),
+                        ]
+                    ),
+                ]
+            )
+        )
+        derived = parse_markdown("| H1 | H2 |\n| --- | --- |\n| A | CHANGED |\n")
+        ops = diff(base, derived)
+        table_ops = _ops_of_type(ops, ReplaceTable)
+        assert len(table_ops) == 1
+        assert table_ops[0].base_index == 0
+
+
+# =========================================================================
+# diff tests — block quote edits
+# =========================================================================
+
+
+class TestDiffBlockQuoteEdits:
+    def test_blockquote_content_changed(self):
+        base = _make_base_doc(
+            BlockQuote(children=[Paragraph(children=[TextRun(text="Original quote", xpath="")])])
+        )
+        derived = parse_markdown("> Edited quote\n")
+        ops = diff(base, derived)
+        bq_ops = _ops_of_type(ops, ReplaceBlockQuote)
+        assert len(bq_ops) == 1
+
+    def test_blockquote_unchanged(self):
+        base = _make_base_doc(
+            BlockQuote(children=[Paragraph(children=[TextRun(text="Same quote", xpath="")])])
+        )
+        derived = parse_markdown("> Same quote\n")
+        ops = diff(base, derived)
+        assert ops == []
+
+
+# =========================================================================
+# diff tests — formatting changes
+# =========================================================================
+
+
+class TestDiffFormattingChanges:
+    """Formatting changes within text runs."""
+
+    def test_bold_added(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="important", xpath="")]))
+        derived = parse_markdown("**important**\n")
+        ops = diff(base, derived)
+        # Text is the same but formatting changed — should detect a replace
+        assert len(ops) == 1
+        assert isinstance(ops[0], ReplaceParagraph)
+        assert ops[0].old_text == "important"
+        assert ops[0].new_text == "important"
+
+    def test_italic_added(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="emphasis", xpath="")]))
+        derived = parse_markdown("*emphasis*\n")
+        ops = diff(base, derived)
+        assert len(ops) == 1
+        assert isinstance(ops[0], ReplaceParagraph)
+
+
+# =========================================================================
+# diff tests — complex scenarios
+# =========================================================================
+
+
+class TestDiffComplexScenarios:
+    """Realistic multi-edit scenarios."""
+
+    def test_interleaved_edits(self):
+        """Multiple edits, inserts, and deletes in one document."""
+        base = _make_base_doc(
+            Heading(level=1, children=[TextRun(text="Title", xpath="")]),
+            Paragraph(children=[TextRun(text="Intro paragraph", xpath="")]),
+            Heading(level=2, children=[TextRun(text="Section A", xpath="")]),
+            Paragraph(children=[TextRun(text="Content A", xpath="")]),
+            Heading(level=2, children=[TextRun(text="Section B", xpath="")]),
+            Paragraph(children=[TextRun(text="Content B", xpath="")]),
+        )
+        derived = parse_markdown(
+            "# Title\n\n"
+            "Intro paragraph\n\n"
+            "## Section A\n\n"
+            "Modified content A\n\n"
+            "## New Section\n\n"
+            "Brand new content\n\n"
+            "## Section B\n\n"
+            "Content B\n"
+        )
+        ops = diff(base, derived)
+        # Should have: 1 replace (Content A), 2 inserts (New Section + new content)
+        replace_ops = _ops_of_type(ops, ReplaceParagraph)
+        insert_ops = _ops_of_type(ops, InsertBlock)
+        assert len(replace_ops) >= 1
+        assert len(insert_ops) >= 1
+        # No deletes
+        delete_ops = _ops_of_type(ops, DeleteBlock)
+        assert len(delete_ops) == 0
+
+    def test_reorder_sections(self):
+        """Swapping sections should produce deletes + inserts."""
+        base = _make_base_doc(
+            Heading(level=2, children=[TextRun(text="Alpha", xpath="")]),
+            Paragraph(children=[TextRun(text="Alpha content", xpath="")]),
+            Heading(level=2, children=[TextRun(text="Beta", xpath="")]),
+            Paragraph(children=[TextRun(text="Beta content", xpath="")]),
+        )
+        derived = parse_markdown("## Beta\n\nBeta content\n\n## Alpha\n\nAlpha content\n")
+        ops = diff(base, derived)
+        # The DP should find the minimum-cost alignment; depending on
+        # similarity it may match some pairs and insert/delete others
+        assert len(ops) > 0
+
+    def test_empty_to_content(self):
+        """Going from empty to having content should be all inserts."""
+        base = Document(children=[])
+        derived = parse_markdown("# Hello\n\nWorld\n")
+        ops = diff(base, derived)
+        assert all(isinstance(op, InsertBlock) for op in ops)
+        assert len(ops) == 2
+
+    def test_content_to_empty(self):
+        """Going from content to empty should be all deletes."""
+        base = _make_base_doc(
+            Heading(level=1, children=[TextRun(text="Title", xpath="")]),
+            Paragraph(children=[TextRun(text="Content", xpath="")]),
+        )
+        derived = Document(children=[])
+        ops = diff(base, derived)
+        assert all(isinstance(op, DeleteBlock) for op in ops)
+        assert len(ops) == 2
+
+
+# =========================================================================
+# Round-trip test: DOCX → markdown → parse → diff (golden file)
+# =========================================================================
+
+
+class TestRoundTrip:
+    """Test the full round-trip: parse DOCX → to_markdown → parse_markdown → diff.
+
+    When no edits are made, the diff should be empty or minimal.
+    """
+
+    @pytest.fixture(scope="class")
+    def docx_doc(self):
+        from extradocx import DocxParser
+
+        docx_path = TESTDATA / "test_report.docx"
+        if not docx_path.exists():
+            pytest.skip("test_report.docx not found")
+        return DocxParser(docx_path).parse()
+
+    def test_roundtrip_no_edits_produces_minimal_diff(self, docx_doc):
+        """DOCX → markdown → parse_markdown → diff should produce few ops.
+
+        We don't expect zero ops because the markdown serialization is lossy
+        (escaping, whitespace normalization). But the number of ops should be
+        small relative to document size.
+        """
+        md = to_markdown(docx_doc)
+        reparsed = parse_markdown(md)
+        ops = diff(docx_doc, reparsed)
+
+        n_blocks = len(docx_doc.children)
+        n_ops = len(ops)
+        # The round-trip should preserve most content — allow up to 30% drift
+        # due to lossy serialization (escaping, whitespace normalization,
+        # formatting loss for underline/super/subscript)
+        ratio = n_ops / max(n_blocks, 1)
+        assert ratio < 0.5, (
+            f"Too many ops ({n_ops}) for {n_blocks} blocks (ratio={ratio:.2f}). "
+            "Round-trip should be mostly stable."
+        )
+
+    def test_roundtrip_with_edit(self, docx_doc):
+        """Make a single edit to the markdown and verify diff detects it."""
+        md = to_markdown(docx_doc)
+        # Inject a new heading after the first line
+        lines = md.split("\n")
+        lines.insert(2, "")
+        lines.insert(3, "## INJECTED HEADING")
+        lines.insert(4, "")
+        lines.insert(5, "This paragraph was injected by the test.")
+        edited_md = "\n".join(lines)
+
+        reparsed = parse_markdown(edited_md)
+        ops = diff(docx_doc, reparsed)
+
+        # Should have at least 1 insert for the injected heading
+        insert_ops = _ops_of_type(ops, InsertBlock)
+        assert len(insert_ops) >= 1
+
+    def test_roundtrip_with_deletion(self, docx_doc):
+        """Delete a heading from the markdown and verify diff detects it."""
+        md = to_markdown(docx_doc)
+        lines = md.split("\n")
+        # Find and remove a heading line
+        heading_idx = None
+        for i, line in enumerate(lines):
+            if line.startswith("## ") and i > 5:
+                heading_idx = i
+                break
+        if heading_idx is None:
+            pytest.skip("No ## heading found to delete")
+
+        # Remove the heading line and one adjacent blank line
+        del lines[heading_idx]
+        if heading_idx < len(lines) and not lines[heading_idx].strip():
+            del lines[heading_idx]
+
+        edited_md = "\n".join(lines)
+        reparsed = parse_markdown(edited_md)
+        ops = diff(docx_doc, reparsed)
+
+        # Should have at least 1 delete
+        delete_ops = _ops_of_type(ops, DeleteBlock)
+        assert len(delete_ops) >= 1
+
+
+# =========================================================================
+# diff operation properties
+# =========================================================================
+
+
+class TestDiffOpProperties:
+    """Verify structural properties of diff operations."""
+
+    def test_delete_ops_have_xpath(self):
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="To delete", xpath="")]),
+            Paragraph(children=[TextRun(text="To keep", xpath="")]),
+        )
+        derived = parse_markdown("To keep\n")
+        ops = diff(base, derived)
+        delete_ops = _ops_of_type(ops, DeleteBlock)
+        assert len(delete_ops) == 1
+        assert delete_ops[0].base_xpath != ""  # xpath was set by _make_base_doc
+
+    def test_replace_ops_have_xpath(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="Original text", xpath="")]))
+        derived = parse_markdown("Modified text\n")
+        ops = diff(base, derived)
+        replace_ops = _ops_of_type(ops, ReplaceParagraph)
+        assert len(replace_ops) == 1
+        assert replace_ops[0].base_xpath != ""
+
+    def test_insert_ops_have_position(self):
+        base = _make_base_doc(Paragraph(children=[TextRun(text="Existing", xpath="")]))
+        derived = parse_markdown("Existing\n\nNew paragraph\n")
+        ops = diff(base, derived)
+        insert_ops = _ops_of_type(ops, InsertBlock)
+        assert len(insert_ops) == 1
+        assert isinstance(insert_ops[0].position, int)
+
+    def test_ops_sorted_deterministically(self):
+        """Operations should be sorted: deletes, then replaces, then inserts."""
+        base = _make_base_doc(
+            Paragraph(children=[TextRun(text="Delete me", xpath="")]),
+            Paragraph(children=[TextRun(text="Edit me original", xpath="")]),
+        )
+        derived = parse_markdown("Edit me changed\n\nNew block\n")
+        ops = diff(base, derived)
+
+        # Verify ordering: deletes first, then replaces, then inserts
+        seen_types: list[str] = []
+        for op in ops:
+            t = type(op).__name__
+            if t not in seen_types:
+                seen_types.append(t)
+        # DeleteBlock should come before others if present
+        if "DeleteBlock" in seen_types:
+            assert seen_types.index("DeleteBlock") == 0