Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
re-probed tens of thousands of glyph occurrences that now collapse to roughly
the number of distinct characters it uses. No public API or behaviour change.

- **Paragraph render writes font and colour operators only when they change.** The
paragraph render handler emitted a `setFont` (`Tf`) and `setNonStrokingColor`
(`rg`) operator for *every* text span, even across the spans of a single-style
paragraph. It now tracks the last-written `(font, size)` and colour across the
paragraph's graphics-state block and re-emits only on a real change (invalidating
after inline images/shapes), so a multi-span single-style paragraph carries one
`Tf` + one `rg` instead of one pair per span — fewer operators for PDFBox to
serialize. **Rendered output is unchanged** (the skipped operators were
redundant); pinned by the visual-regression suite plus a content-stream test
asserting one `Tf` across many drawn spans. No public API or behaviour change.

- **Table cell text is sanitized once per cell instead of three times.** Resolving
a table ran each cell's lines through `sanitizeCellLines` separately in the
natural-width, natural-height and resolve passes, rebuilding the list and its
per-line control-character cleanup up to three times per cell. The sanitized
lines are now computed once when the logical grid is built and reused by all
three passes. **Output is byte-identical** (sanitization is deterministic); on a
large table this removes the dominant per-cell layout allocation. No public API
or behaviour change.

### Tests / tooling

- **Benchmark regression gate and measurement probe (benchmarks module, not part
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
import com.demcha.compose.font.FontLibrary;
import com.demcha.compose.engine.render.pdf.PdfFont;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;

import java.awt.Color;
import java.io.IOException;
import java.util.List;

Expand Down Expand Up @@ -56,6 +58,11 @@ public void render(PlacedFragment fragment,

stream.saveGraphicsState();
try {
// Font and non-stroking colour persist across BT/ET within this one
// q...Q block, so track the last-written pair and re-emit Tf/rg only
// when a span actually changes them — a single-style paragraph then
// emits one setFont + one setNonStrokingColor instead of one per span.
TextRenderState textState = new TextRenderState();
double cursorTop = contentTop;
for (int lineIndex = 0; lineIndex < payload.lines().size(); lineIndex++) {
ParagraphLine line = payload.lines().get(lineIndex);
Expand All @@ -71,7 +78,7 @@ public void render(PlacedFragment fragment,
case LEFT -> innerX;
};

renderLine(stream, fonts, line, lineX, baselineY, environment);
renderLine(stream, fonts, line, lineX, baselineY, environment, textState);

cursorTop = lineTop - resolvedLineHeight - payload.lineGap();
}
Expand Down Expand Up @@ -127,7 +134,8 @@ private void renderLine(PDPageContentStream stream,
ParagraphLine line,
double lineX,
double baselineY,
PdfRenderEnvironment environment) throws IOException {
PdfRenderEnvironment environment,
TextRenderState textState) throws IOException {
List<ParagraphSpan> spans = line.spans();
if (spans.isEmpty()) {
return;
Expand Down Expand Up @@ -155,8 +163,10 @@ private void renderLine(PDPageContentStream stream,
stream.newLineAtOffset((float) cursorX, (float) baselineY);
inTextBlock = true;
}
stream.setFont(font.fontType(textSpan.textStyle().decoration()), (float) textSpan.textStyle().size());
stream.setNonStrokingColor(textSpan.textStyle().color());
textState.applyFont(stream,
font.fontType(textSpan.textStyle().decoration()),
(float) textSpan.textStyle().size());
textState.applyColor(stream, textSpan.textStyle().color());
stream.showText(text);
cursorX += textSpan.width();
} else if (span instanceof ParagraphImageSpan imageSpan) {
Expand All @@ -176,6 +186,10 @@ private void renderLine(PDPageContentStream stream,
(float) imageBottom,
(float) imageSpan.width(),
(float) imageSpan.height());
// An inline graphic runs its own graphics-state save/restore and
// colour ops; drop the tracked font/colour so the next text span
// re-emits them rather than trusting persistence across it.
textState.invalidate();
cursorX += imageSpan.width();
} else if (span instanceof ParagraphShapeSpan shapeSpan) {
if (inTextBlock) {
Expand All @@ -184,6 +198,7 @@ private void renderLine(PDPageContentStream stream,
}
renderShape(stream, shapeSpan, cursorX, baselineY,
line.textAscent(), line.baselineOffsetFromBottom(), line.lineHeight());
textState.invalidate();
cursorX += shapeSpan.width();
}
}
Expand Down Expand Up @@ -287,4 +302,39 @@ private static void renderShape(PDPageContentStream stream,
}
}

/**
* Tracks the font/size and non-stroking colour last written to the content
* stream within one paragraph's {@code q...Q} block, so the handler emits a
* {@code Tf}/{@code rg} operator only when a span actually changes them. The
* common single-style paragraph then carries one of each instead of one per
* span. {@link #invalidate()} forces a re-emit after anything that may disturb
* the persisted text state (inline images, shapes).
*/
private static final class TextRenderState {
private PDFont font;
private float size = Float.NaN;
private Color color;

void applyFont(PDPageContentStream stream, PDFont newFont, float newSize) throws IOException {
if (newFont != font || newSize != size) {
stream.setFont(newFont, newSize);
font = newFont;
size = newSize;
}
}

void applyColor(PDPageContentStream stream, Color newColor) throws IOException {
if (!newColor.equals(color)) {
stream.setNonStrokingColor(newColor);
color = newColor;
}
}

void invalidate() {
font = null;
size = Float.NaN;
color = null;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,14 @@ record ResolvedTableLayout(
* are skipped when iterating later source rows. {@code source} is the
* original public {@link DocumentTableCell}, retained so the layout
* can detect composed-content cells via
* {@link DocumentTableCell#hasComposedContent()}.
* {@link DocumentTableCell#hasComposedContent()}. {@code sanitizedLines} is
* the cell's text with control characters cleaned, computed once here and
* reused by the width, height and resolve passes instead of re-sanitizing the
* content three times.
*/
private record LogicalCell(int startRow, int startColumn, int colSpan, int rowSpan,
TableCellContent content, DocumentTableCell source) {
TableCellContent content, DocumentTableCell source,
List<String> sanitizedLines) {
}

/**
Expand Down Expand Up @@ -207,7 +211,7 @@ static ResolvedTableLayoutWithContents resolveTableLayout(TableNode node,
width,
height,
yOffset,
sanitizeCellLines(logical.content()),
logical.sanitizedLines(),
style,
fillInsets(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan()),
borderSides(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan())));
Expand Down Expand Up @@ -298,7 +302,7 @@ private static double naturalCellHeight(LogicalCell logical,
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
return prepared.measureResult().height() + padding.vertical();
}
return cellNaturalHeight(logical.content(), style, measurement);
return cellNaturalHeight(logical.sanitizedLines(), style, measurement);
}

static PreparedNode<TableNode> sliceTablePreparedNode(TableNode source,
Expand Down Expand Up @@ -510,8 +514,9 @@ private static List<List<LogicalCell>> buildLogicalRows(TableNode node, int colu
occupied[r][c] = true;
}
}
TableCellContent content = toTableCell(cell);
logical.add(new LogicalCell(rowIndex, col, cell.colSpan(), cell.rowSpan(),
toTableCell(cell), cell));
content, cell, sanitizeCellLines(content)));
col += cell.colSpan();
}
if (sourceIdx < source.size()) {
Expand Down Expand Up @@ -572,7 +577,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node,
}
int col = logical.startColumn();
singleCellRequired[col] = Math.max(singleCellRequired[col],
cellNaturalWidth(logical.content(), stylesGrid[rowIndex][col], measurement));
cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][col], measurement));
}
}

Expand All @@ -596,7 +601,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node,
}
int startCol = logical.startColumn();
int endCol = startCol + logical.colSpan();
double need = cellNaturalWidth(logical.content(), stylesGrid[rowIndex][startCol], measurement);
double need = cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][startCol], measurement);
double have = sumRange(widths, startCol, endCol);
if (need <= have + EPS) {
continue;
Expand Down Expand Up @@ -669,22 +674,22 @@ private static double[] resolveFinalColumnWidths(TableNode node,
return finalWidths;
}

private static double cellNaturalWidth(TableCellContent cell,
private static double cellNaturalWidth(List<String> sanitizedLines,
TableCellLayoutStyle style,
TextMeasurementSystem measurement) {
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
double maxWidth = 0.0;
for (String line : sanitizeCellLines(cell)) {
for (String line : sanitizedLines) {
maxWidth = Math.max(maxWidth, measurement.textWidth(style.textStyle(), line));
}
return maxWidth + padding.horizontal();
}

private static double cellNaturalHeight(TableCellContent cell,
private static double cellNaturalHeight(List<String> sanitizedLines,
TableCellLayoutStyle style,
TextMeasurementSystem measurement) {
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
int lineCount = Math.max(1, sanitizeCellLines(cell).size());
int lineCount = Math.max(1, sanitizedLines.size());
return (lineCount * measurement.lineHeight(style.textStyle()))
+ ((lineCount - 1) * tableCellLineSpacing(style))
+ padding.vertical();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package com.demcha.compose.document.backend.fixed.pdf;

import static org.assertj.core.api.Assertions.assertThat;

import com.demcha.compose.GraphCompose;
import com.demcha.compose.document.api.DocumentSession;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.util.List;

/**
* Guards Finding 5: the paragraph render handler tracks the last-written font and
* non-stroking colour, so a single-style paragraph that wraps into many spans
* emits <b>one</b> {@code setFont} ({@code Tf}) operator for the whole paragraph
* instead of one per span.
*
* <p>Renders a real one-page document and inspects the page content stream through
* the established {@link PDFStreamParser} token pattern — the proof lives entirely
* in test scope, with no instrumentation in the render handler.</p>
*/
class ParagraphTextStateDedupTest {

@Test
void singleStyleParagraphEmitsOneFontOperatorAcrossManySpans() throws Exception {
byte[] pdf;
try (DocumentSession session = GraphCompose.document()
.pageSize(400, 800)
.margin(24, 24, 24, 24)
.create()) {
// One uniform style; long enough to wrap into many lines/spans on a
// single page so the dedup is meaningful (without the guard this emits
// a Tf per span).
String body = ("GraphCompose lays out structured documents across pages "
+ "while keeping headers and footers stable. ").repeat(8);
session.pageFlow(flow -> flow.addParagraph(p -> p.text(body)));
pdf = session.toPdfBytes();
}

try (PDDocument document = Loader.loadPDF(pdf)) {
assertThat(document.getNumberOfPages())
.describedAs("body is sized to stay on one page so one q...Q block covers every span")
.isEqualTo(1);
int fontOps = operatorCount(document, "Tf");
int textDraws = operatorCount(document, "Tj") + operatorCount(document, "TJ");

assertThat(textDraws)
.describedAs("the paragraph must wrap into several drawn spans for the dedup to be meaningful")
.isGreaterThanOrEqualTo(2);
assertThat(fontOps)
.describedAs("one setFont for the whole single-style paragraph, not one per span")
.isEqualTo(1);
}
}

@Test
void multiStyleParagraphReEmitsFontOnEachStyleChange() throws Exception {
byte[] pdf;
try (DocumentSession session = GraphCompose.document()
.pageSize(400, 800)
.margin(24, 24, 24, 24)
.create()) {
// Three consecutive runs with distinct decorations (regular / bold /
// regular) on one line: the tracker must re-emit Tf at each change,
// not over-dedup them into a single setFont (which would draw the bold
// run in the regular font).
session.pageFlow(flow -> flow.addParagraph(p ->
p.rich(r -> r.plain("alpha ").bold("bravo ").plain("charlie"))));
pdf = session.toPdfBytes();
}

try (PDDocument document = Loader.loadPDF(pdf)) {
assertThat(operatorCount(document, "Tf"))
.describedAs("a style change within a paragraph must re-emit setFont (single-style baseline is 1)")
.isGreaterThanOrEqualTo(2);
}
}

private static int operatorCount(PDDocument document, String operatorName) throws IOException {
int count = 0;
for (var page : document.getPages()) {
List<Object> tokens = new PDFStreamParser(page).parse();
for (Object token : tokens) {
if (token instanceof Operator operator && operatorName.equals(operator.getName())) {
count++;
}
}
}
return count;
}
}