From 24f105039bb43cfe34f9b6df792813cebbf6b4dc Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 14:51:28 +0100 Subject: [PATCH 1/7] ValueKind helpers --- .../java/org/rcsb/cif/model/FloatColumn.java | 2 +- .../java/org/rcsb/cif/model/IntColumn.java | 2 +- .../java/org/rcsb/cif/model/ValueKind.java | 25 ++++++++++++++++++- .../org/rcsb/cif/model/text/TextColumn.java | 10 ++------ 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/rcsb/cif/model/FloatColumn.java b/src/main/java/org/rcsb/cif/model/FloatColumn.java index 9c6227c4e..940c63863 100644 --- a/src/main/java/org/rcsb/cif/model/FloatColumn.java +++ b/src/main/java/org/rcsb/cif/model/FloatColumn.java @@ -29,7 +29,7 @@ default DoubleStream values() { * @return a double */ static double parseFloat(String text) { - if (text.isEmpty() || ".".equals(text) || "?".equals(text)) { + if (text.isEmpty() || ValueKind.CIF_NOT_PRESENT.equals(text) || ValueKind.CIF_UNKNOWN.equals(text)) { return 0; } // some formats specify uncertain decimal places like: 0.00012(3) - ignore them (in agreement with Mol*) diff --git a/src/main/java/org/rcsb/cif/model/IntColumn.java b/src/main/java/org/rcsb/cif/model/IntColumn.java index 964bb80e9..1ceaec702 100644 --- a/src/main/java/org/rcsb/cif/model/IntColumn.java +++ b/src/main/java/org/rcsb/cif/model/IntColumn.java @@ -29,7 +29,7 @@ default IntStream values() { * @return an int */ static int parseInt(String text) { - if (text.isEmpty() || ".".equals(text) || "?".equals(text)) { + if (text.isEmpty() || ValueKind.CIF_NOT_PRESENT.equals(text) || ValueKind.CIF_UNKNOWN.equals(text)) { return 0; } // some floats may omit decimal places and can be parsed as int: 88. - ignore the dot (in agreement with Mol*) diff --git a/src/main/java/org/rcsb/cif/model/ValueKind.java b/src/main/java/org/rcsb/cif/model/ValueKind.java index 2f6fb5473..c7564dfa7 100644 --- a/src/main/java/org/rcsb/cif/model/ValueKind.java +++ b/src/main/java/org/rcsb/cif/model/ValueKind.java @@ -16,5 +16,28 @@ public enum ValueKind { /** * The value is unknown - ? in CIF. String values will be empty, number values will be 0. */ - UNKNOWN + UNKNOWN; + + public static final String CIF_NOT_PRESENT = "."; + public static final String CIF_UNKNOWN = "?"; + + /** + * Checks whether a String matches "?" or ".", sequences with special meaning in CIF. + * @param s payload to evaluate + * @return true if this String indicates missing or undefined values + */ + public static boolean isValueKindToken(String s) { + return CIF_NOT_PRESENT.equals(s) || CIF_UNKNOWN.equals(s); + } + + /** + * Transforms a String into a ValueKind. + * @param s payload to evaluate + * @return appropriate ValueKind for "?" and ".", otherwise marked as PRESENT + */ + public static ValueKind fromCifToken(String s) { + if (s == null || s.isEmpty() || CIF_NOT_PRESENT.equals(s)) return NOT_PRESENT; + if (CIF_UNKNOWN.equals(s)) return UNKNOWN; + return PRESENT; + } } diff --git a/src/main/java/org/rcsb/cif/model/text/TextColumn.java b/src/main/java/org/rcsb/cif/model/text/TextColumn.java index c0e2e86dc..10f857e08 100644 --- a/src/main/java/org/rcsb/cif/model/text/TextColumn.java +++ b/src/main/java/org/rcsb/cif/model/text/TextColumn.java @@ -36,19 +36,13 @@ public String getStringData(int row) { } private String honorValueKind(String value) { - return (".".equals(value) || "?".equals(value)) ? "" : value; + return ValueKind.isValueKindToken(value) ? "" : value; } @Override public ValueKind getValueKind(int row) { String value = textData.substring(startToken[row], endToken[row]); - if (value.isEmpty() || ".".equals(value)) { - return ValueKind.NOT_PRESENT; - } else if ("?".equals(value)) { - return ValueKind.UNKNOWN; - } else { - return ValueKind.PRESENT; - } + return ValueKind.fromCifToken(value); } /** From 889821da098a54d1f1943b51692116971d05f42d Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 14:52:09 +0100 Subject: [PATCH 2/7] drop streams for tight loops --- .../cif/model/builder/ColumnBuilderImpl.java | 2 +- .../model/builder/FloatColumnBuilderImpl.java | 19 ++++--- .../model/builder/IntColumnBuilderImpl.java | 16 +++--- .../model/builder/StrColumnBuilderImpl.java | 49 +++++++++++++++---- 4 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/rcsb/cif/model/builder/ColumnBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/ColumnBuilderImpl.java index c34fc03ff..50473e470 100644 --- a/src/main/java/org/rcsb/cif/model/builder/ColumnBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/ColumnBuilderImpl.java @@ -12,7 +12,7 @@ public abstract class ColumnBuilderImpl

, PP extends BlockBuilder, PPP extends CifFileBuilder> implements ColumnBuilder { private final String categoryName; private final String columnName; - final List mask; + final ArrayList mask; // concrete impl to expose ensureCapacity() final P parent; ColumnBuilderImpl(String categoryName, String columnName, P parent) { diff --git a/src/main/java/org/rcsb/cif/model/builder/FloatColumnBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/FloatColumnBuilderImpl.java index 8bd85c667..17ca682d8 100644 --- a/src/main/java/org/rcsb/cif/model/builder/FloatColumnBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/FloatColumnBuilderImpl.java @@ -9,14 +9,12 @@ import java.util.ArrayList; import java.util.List; -import java.util.stream.DoubleStream; -import java.util.stream.IntStream; import static org.rcsb.cif.model.CategoryBuilder.createColumnText; public class FloatColumnBuilderImpl

, PP extends BlockBuilder, PPP extends CifFileBuilder> extends ColumnBuilderImpl implements FloatColumnBuilder { - private final List values; + private final ArrayList values; public FloatColumnBuilderImpl(String categoryName, String columnName, P parent) { super(categoryName, columnName, parent); @@ -30,14 +28,14 @@ public List getValues() { @Override public FloatColumnBuilder markNextNotPresent() { - values.add(0.0); + values.add(null); mask.add(ValueKind.NOT_PRESENT); return this; } @Override public FloatColumnBuilder markNextUnknown() { - values.add(0.0); + values.add(null); mask.add(ValueKind.UNKNOWN); return this; } @@ -48,9 +46,14 @@ public FloatColumn build() { } @Override - public FloatColumnBuilder add(double... value) { - DoubleStream.of(value).forEach(values::add); - IntStream.range(0, value.length).mapToObj(i -> ValueKind.PRESENT).forEach(mask::add); + public FloatColumnBuilder add(double... values) { + this.values.ensureCapacity(this.values.size() + values.length); + this.mask.ensureCapacity(this.mask.size() + values.length); + + for (double v : values) { + this.values.add(v); + this.mask.add(ValueKind.PRESENT); + } return this; } diff --git a/src/main/java/org/rcsb/cif/model/builder/IntColumnBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/IntColumnBuilderImpl.java index 63a3097e8..5c62162e9 100644 --- a/src/main/java/org/rcsb/cif/model/builder/IntColumnBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/IntColumnBuilderImpl.java @@ -9,13 +9,12 @@ import java.util.ArrayList; import java.util.List; -import java.util.stream.IntStream; import static org.rcsb.cif.model.CategoryBuilder.createColumnText; public class IntColumnBuilderImpl

, PP extends BlockBuilder, PPP extends CifFileBuilder> extends ColumnBuilderImpl implements IntColumnBuilder { - private final List values; + private final ArrayList values; public IntColumnBuilderImpl(String categoryName, String columnName, P parent) { super(categoryName, columnName, parent); @@ -29,14 +28,14 @@ public List getValues() { @Override public IntColumnBuilder markNextNotPresent() { - values.add(0); + values.add(null); mask.add(ValueKind.NOT_PRESENT); return this; } @Override public IntColumnBuilder markNextUnknown() { - values.add(0); + values.add(null); mask.add(ValueKind.UNKNOWN); return this; } @@ -48,8 +47,13 @@ public IntColumn build() { @Override public IntColumnBuilder add(int... values) { - IntStream.of(values).forEach(this.values::add); - IntStream.range(0, values.length).mapToObj(i -> ValueKind.PRESENT).forEach(mask::add); + this.values.ensureCapacity(this.values.size() + values.length); + this.mask.ensureCapacity(this.mask.size() + values.length); + + for (int v : values) { + this.values.add(v); + this.mask.add(ValueKind.PRESENT); + } return this; } diff --git a/src/main/java/org/rcsb/cif/model/builder/StrColumnBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/StrColumnBuilderImpl.java index 3772a19d3..fef75df02 100644 --- a/src/main/java/org/rcsb/cif/model/builder/StrColumnBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/StrColumnBuilderImpl.java @@ -14,7 +14,7 @@ public class StrColumnBuilderImpl

, PP extends BlockBuilder, PPP extends CifFileBuilder> extends ColumnBuilderImpl implements StrColumnBuilder { - private final List values; + private final ArrayList values; public StrColumnBuilderImpl(String categoryName, String columnName, P parent) { super(categoryName, columnName, parent); @@ -28,14 +28,14 @@ public List getValues() { @Override public StrColumnBuilder markNextNotPresent() { - values.add("."); + values.add(null); mask.add(ValueKind.NOT_PRESENT); return this; } @Override public StrColumnBuilder markNextUnknown() { - values.add("?"); + values.add(null); mask.add(ValueKind.UNKNOWN); return this; } @@ -45,16 +45,45 @@ public StrColumn build() { return createColumnText(getColumnName(), values, mask, StrColumn.class); } + /** + * Add one or more string values to this column. + *

+ * CIF has two special tokens for missing data: {@code "."} (not present) and {@code "?"} + * (unknown). This method treats those tokens (and {@code null}) as missingness indicators + * rather than literal payload: + *

+ *

+ * Note: this means you cannot write a literal value that is exactly {@code "."} or {@code "?"} + * via this overload. If you need explicit control over missingness vs. payload, prefer the + * forthcoming masked overload that accepts {@link ValueKind} alongside values (e.g. {@code addMasked(...)}). + * + * @param values string values to append; {@code null}, {@code "."}, and {@code "?"} are treated specially + * @return this builder instance + */ @Override public StrColumnBuilder add(String... values) { + this.values.ensureCapacity(this.values.size() + values.length); + this.mask.ensureCapacity(this.mask.size() + values.length); + for (String s : values) { - if (".".equals(s)) { - markNextNotPresent(); - } else if ("?".equals(s)) { - markNextUnknown(); - } else { - this.values.add(s); - mask.add(ValueKind.PRESENT); + ValueKind kind = ValueKind.fromCifToken(s); + switch (kind) { + case NOT_PRESENT: + markNextNotPresent(); + break; + case UNKNOWN: + markNextUnknown(); + break; + case PRESENT: + this.values.add(s); + this.mask.add(ValueKind.PRESENT); + break; + default: + throw new IllegalStateException("Unhandled ValueKind: " + kind); } } return this; From 7f295aa4bc75dced612ff0fc03a57588934a9fa2 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 15:05:59 +0100 Subject: [PATCH 3/7] cleanup --- .../rcsb/cif/model/builder/CategoryBuilderImpl.java | 1 - src/test/java/org/rcsb/cif/CifOptionsTest.java | 4 ++-- src/test/java/org/rcsb/cif/IntegrationTest.java | 4 ++-- src/test/java/org/rcsb/cif/TestHelper.java | 2 +- src/test/java/org/rcsb/cif/WriterTest.java | 10 +++++----- .../rcsb/cif/binary/codec/MessagePackCodecTest.java | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java index d5da85c7f..2e5f7a896 100644 --- a/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java @@ -46,7 +46,6 @@ public Map> getColumns() { return columns; } - @SuppressWarnings("unchecked") @Override public P leaveCategory() { if (parent == null) { diff --git a/src/test/java/org/rcsb/cif/CifOptionsTest.java b/src/test/java/org/rcsb/cif/CifOptionsTest.java index b6ab2bf42..a3aacab44 100644 --- a/src/test/java/org/rcsb/cif/CifOptionsTest.java +++ b/src/test/java/org/rcsb/cif/CifOptionsTest.java @@ -81,7 +81,7 @@ void testFetchUrlText() throws IOException { // by switching to RCSB cif files, the implementation type should be text CifFile cifFile = CifIO.readById("1acj", CifOptions.builder() .fetchUrl("https://files.rcsb.org/download/%s.cif").build()); - assertTrue(cifFile instanceof TextFile); + assertInstanceOf(TextFile.class, cifFile); } @Test @@ -89,7 +89,7 @@ void testFetchUrlBinary() throws IOException { // by switching to RCSB bcif files, the implementation type should be binary CifFile cifFile = CifIO.readById("1acj", CifOptions.builder() .fetchUrl("https://models.rcsb.org/%s.bcif").build()); - assertTrue(cifFile instanceof BinaryFile); + assertInstanceOf(BinaryFile.class, cifFile); } @Test diff --git a/src/test/java/org/rcsb/cif/IntegrationTest.java b/src/test/java/org/rcsb/cif/IntegrationTest.java index 027777f07..082889fb9 100644 --- a/src/test/java/org/rcsb/cif/IntegrationTest.java +++ b/src/test/java/org/rcsb/cif/IntegrationTest.java @@ -98,12 +98,12 @@ void testDelegationBehavior() throws IOException { MmCifFile textCifFile = CifIO.readFromInputStream(TestHelper.getInputStream("cif/1acj.cif")).as(StandardSchemata.MMCIF); textCifFile.getFirstBlock() .categories() - .forEach(category -> assertTrue(category instanceof DelegatingCategory, "no delegation for text after schema was imposed for " + category.getCategoryName())); + .forEach(category -> assertInstanceOf(DelegatingCategory.class, category, "no delegation for text after schema was imposed for " + category.getCategoryName())); MmCifFile binaryCifFile = CifIO.readFromInputStream(TestHelper.getInputStream("bcif/1acj.bcif")).as(StandardSchemata.MMCIF); binaryCifFile.getFirstBlock() .categories() - .forEach(category -> assertTrue(category instanceof DelegatingCategory, "no delegation for binary after schema was imposed for " + category.getCategoryName())); + .forEach(category -> assertInstanceOf(DelegatingCategory.class, category, "no delegation for binary after schema was imposed for " + category.getCategoryName())); } @Test diff --git a/src/test/java/org/rcsb/cif/TestHelper.java b/src/test/java/org/rcsb/cif/TestHelper.java index a35d35d08..a027d7104 100644 --- a/src/test/java/org/rcsb/cif/TestHelper.java +++ b/src/test/java/org/rcsb/cif/TestHelper.java @@ -21,7 +21,7 @@ * Origin of files: * - bcif created by Mol* encoder * - cif created by Mol* encoder - * + *

* All tests ensure that the behavior of the reference implementation (i.e. Mol*) is recreated rather than that output * is in perfect agreement with e.g. PDB files. */ diff --git a/src/test/java/org/rcsb/cif/WriterTest.java b/src/test/java/org/rcsb/cif/WriterTest.java index 7aa6917d5..773875a57 100644 --- a/src/test/java/org/rcsb/cif/WriterTest.java +++ b/src/test/java/org/rcsb/cif/WriterTest.java @@ -116,11 +116,11 @@ void testClassInferenceOfBuiltMmCifFile() { .leaveBlock() .leaveFile(); MmCifBlock block = cifFile.getFirstBlock(); - assertTrue(block.getCategory("atom_site") instanceof org.rcsb.cif.schema.mm.AtomSite); - assertTrue(block.getCategory("atom_site").getColumn("B_iso_or_equiv") instanceof FloatColumn); + assertInstanceOf(org.rcsb.cif.schema.mm.AtomSite.class, block.getCategory("atom_site")); + assertInstanceOf(FloatColumn.class, block.getCategory("atom_site").getColumn("B_iso_or_equiv")); Category atom_site = new CategoryBuilderImpl<>("atom_site", null).build(); - assertTrue(atom_site instanceof TextCategory); + assertInstanceOf(TextCategory.class, atom_site); FloatColumn cartnX = new FloatColumnBuilderImpl<>("atom_site", "Cartn_x", null).build(); assertNotNull(cartnX); @@ -144,7 +144,7 @@ void testClassInferenceOfBuiltCifCoreFile() { assertTrue(columnBySchema.isDefined()); Column columnByName = block.getColumn("atom_site_B_iso_or_equiv"); assertTrue(columnByName.isDefined()); - assertTrue(columnByName instanceof FloatColumn); + assertInstanceOf(FloatColumn.class, columnByName); AtomSite categoryBySchema = block.getAtomSite(); assertTrue(categoryBySchema.isDefined()); @@ -154,7 +154,7 @@ void testClassInferenceOfBuiltCifCoreFile() { assertFalse(categoryByName.isDefined()); Category atom_site = new CategoryBuilderImpl<>("atom_site", null).build(); - assertTrue(atom_site instanceof TextCategory); + assertInstanceOf(TextCategory.class, atom_site); FloatColumn cartnX = new FloatColumnBuilderImpl<>("atom_site", "Cartn_x", null).build(); assertNotNull(cartnX); diff --git a/src/test/java/org/rcsb/cif/binary/codec/MessagePackCodecTest.java b/src/test/java/org/rcsb/cif/binary/codec/MessagePackCodecTest.java index 68cb56f66..2fb3b9560 100644 --- a/src/test/java/org/rcsb/cif/binary/codec/MessagePackCodecTest.java +++ b/src/test/java/org/rcsb/cif/binary/codec/MessagePackCodecTest.java @@ -15,7 +15,7 @@ import static org.rcsb.cif.TestHelper.convertToIntArray; /** - * - Obtain MessagePacked data at: https://msgpack.org/ + * - Obtain MessagePacked data at: msgpack.com * - Always use a sorted Map implementation (e.g. LinkedHashMap) as order matters. */ class MessagePackCodecTest { From 1b77df0aa163d23f663a5b2b72a3565a402e5fce Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 15:17:08 +0100 Subject: [PATCH 4/7] dep upgrade --- pom.xml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index e66d3a879..46d5a03bd 100644 --- a/pom.xml +++ b/pom.xml @@ -58,10 +58,15 @@ org.junit.jupiter junit-jupiter-api - 5.14.0 + 6.0.2 + test + + + org.junit.jupiter + junit-jupiter-engine + 6.0.2 test - @@ -85,6 +90,9 @@ org.apache.maven.plugins maven-surefire-plugin 3.5.4 + + true + @@ -98,7 +106,7 @@ org.sonatype.central central-publishing-maven-plugin - 0.9.0 + 0.10.0 true central @@ -137,7 +145,7 @@ org.apache.maven.plugins maven-source-plugin - 3.3.1 + 3.4.0 attach-sources From 1d7a54db21ae7fbab722dfa85dd4cf4476029e52 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 15:29:58 +0100 Subject: [PATCH 5/7] validate columns are equal in size --- CHANGELOG.md | 5 +++++ .../rcsb/cif/model/builder/CategoryBuilderImpl.java | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63b913cba..182b271e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,11 @@ This project uses semantic versioning. Furthermore, this project provides code t * `diffrn_radiation_wavelength_id` removed * `geom_bond_distance_min` renamed to `geom_min_bond_distance_cutoff` +### General +* improved builder ergonomics + * added overloads that make it easier to build columns with missing values + * validate that all columns in a category are equal in size + ciftools-java 7.0.2 - September 2025 ------------- ### Bug fixes diff --git a/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java b/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java index 2e5f7a896..a3ac61f18 100644 --- a/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java +++ b/src/main/java/org/rcsb/cif/model/builder/CategoryBuilderImpl.java @@ -27,6 +27,7 @@ public class CategoryBuilderImpl

, PP extends CifFileB protected final P parent; private final List, P, PP>> pendingDigests; private final List, P, PP>> finishedDigests; + private Integer expectedRowCount; public CategoryBuilderImpl(String categoryName, P parent) { this.categoryName = categoryName; @@ -34,6 +35,7 @@ public CategoryBuilderImpl(String categoryName, P parent) { this.parent = parent; this.pendingDigests = new ArrayList<>(); this.finishedDigests = new ArrayList<>(); + this.expectedRowCount = null; } @Override @@ -56,6 +58,17 @@ public P leaveCategory() { pendingDigests.stream() .filter(child -> !finishedDigests.contains(child)) .forEach(child -> { + // assert all columns are equal in size + int n = child.getMask().size(); + if (expectedRowCount == null) { + expectedRowCount = n; + } else if (expectedRowCount != n) { + throw new IllegalStateException("Category '" + categoryName + "': column '" + + child.getColumnName() + "' has " + n + " rows, but expected " + expectedRowCount + + " based on previously digested columns. Ensure every column receives exactly one value " + + "per row (use markNextUnknown()/markNextNotPresent() for missing values)."); + } + if (child instanceof IntColumnBuilder) { digest((IntColumnBuilder, P, PP>) child); } else if (child instanceof FloatColumnBuilder) { From d5fc0deb75fcc6a6e08279889b65e315ceabf7d0 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 16:06:34 +0100 Subject: [PATCH 6/7] builder: `addMasked` and `addAll` convenience methods --- .../rcsb/cif/model/FloatColumnBuilder.java | 57 +++++++++++++++ .../org/rcsb/cif/model/IntColumnBuilder.java | 57 +++++++++++++++ .../org/rcsb/cif/model/StrColumnBuilder.java | 69 +++++++++++++++++++ .../java/org/rcsb/cif/model/ValueKind.java | 2 +- 4 files changed, 184 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/rcsb/cif/model/FloatColumnBuilder.java b/src/main/java/org/rcsb/cif/model/FloatColumnBuilder.java index 8a397c977..51c241acb 100644 --- a/src/main/java/org/rcsb/cif/model/FloatColumnBuilder.java +++ b/src/main/java/org/rcsb/cif/model/FloatColumnBuilder.java @@ -1,6 +1,7 @@ package org.rcsb.cif.model; import java.util.List; +import java.util.Objects; /** * A builder instance for {@link FloatColumn} instances. @@ -27,4 +28,60 @@ public interface FloatColumnBuilder

, PP exten * @return this builder instance */ FloatColumnBuilder add(double... values); + + /** + * Add new values with fine-grained control. + *

+ * For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended. + * For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to + * {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively. + *

+ * @param values array of double values + * @param mask array of {@link ValueKind}, must be the same length as {@code values} + * @return this builder instance + * @throws IllegalArgumentException if arrays differ in size + * @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null + */ + default FloatColumnBuilder addMasked(double[] values, ValueKind[] mask) { + Objects.requireNonNull(values, "values"); + Objects.requireNonNull(mask, "mask"); + if (values.length != mask.length) { + throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")"); + } + + for (int i = 0; i < values.length; i++) { + ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]"); + switch (k) { + case PRESENT: + add(values[i]); + break; + case NOT_PRESENT: + markNextNotPresent(); + break; + case UNKNOWN: + markNextUnknown(); + break; + default: + throw new IllegalStateException("Unhandled ValueKind: " + k); + } + } + return this; + } + + /** + * Add values from an Iterable. + * @param values Double values, null is mapped to ValueKind.NOT_PRESENT (".") + * @return this builder instance + */ + default FloatColumnBuilder addNullable(Iterable values) { + Objects.requireNonNull(values, "values"); + for (Double v : values) { + if (v == null) { + markNextNotPresent(); + } else { + add(v); + } + } + return this; + } } diff --git a/src/main/java/org/rcsb/cif/model/IntColumnBuilder.java b/src/main/java/org/rcsb/cif/model/IntColumnBuilder.java index bab5087ff..661a69b1a 100644 --- a/src/main/java/org/rcsb/cif/model/IntColumnBuilder.java +++ b/src/main/java/org/rcsb/cif/model/IntColumnBuilder.java @@ -1,6 +1,7 @@ package org.rcsb.cif.model; import java.util.List; +import java.util.Objects; /** * A builder instance for {@link IntColumn} instances. @@ -27,4 +28,60 @@ public interface IntColumnBuilder

, PP extends * @return this builder instance */ IntColumnBuilder add(int... values); + + /** + * Add new values with fine-grained control. + *

+ * For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended. + * For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to + * {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively. + *

+ * @param values array of int values + * @param mask array of {@link ValueKind}, must be the same length as {@code values} + * @return this builder instance + * @throws IllegalArgumentException if arrays differ in size + * @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null + */ + default IntColumnBuilder addMasked(int[] values, ValueKind[] mask) { + Objects.requireNonNull(values, "values"); + Objects.requireNonNull(mask, "mask"); + if (values.length != mask.length) { + throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")"); + } + + for (int i = 0; i < values.length; i++) { + ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]"); + switch (k) { + case PRESENT: + add(values[i]); + break; + case NOT_PRESENT: + markNextNotPresent(); + break; + case UNKNOWN: + markNextUnknown(); + break; + default: + throw new IllegalStateException("Unhandled ValueKind: " + k); + } + } + return this; + } + + /** + * Add values from an Iterable. + * @param values Integer values, null is mapped to ValueKind.NOT_PRESENT (".") + * @return this builder instance + */ + default IntColumnBuilder addNullable(Iterable values) { + Objects.requireNonNull(values, "values"); + for (Integer v : values) { + if (v == null) { + markNextNotPresent(); + } else { + add(v); + } + } + return this; + } } diff --git a/src/main/java/org/rcsb/cif/model/StrColumnBuilder.java b/src/main/java/org/rcsb/cif/model/StrColumnBuilder.java index 6f8057fa9..90ca17667 100644 --- a/src/main/java/org/rcsb/cif/model/StrColumnBuilder.java +++ b/src/main/java/org/rcsb/cif/model/StrColumnBuilder.java @@ -1,6 +1,7 @@ package org.rcsb.cif.model; import java.util.List; +import java.util.Objects; /** * A builder instance for {@link StrColumn} instances. @@ -27,4 +28,72 @@ public interface StrColumnBuilder

, PP extends * @return this builder instance */ StrColumnBuilder add(String... values); + + /** + * Add new values with fine-grained control. + *

+ * For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended. + * For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to + * {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively. + *

+ * @param values array of String values + * @param mask array of {@link ValueKind}, must be the same length as {@code values} + * @return this builder instance + * @throws IllegalArgumentException if arrays differ in size + * @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null + */ + default StrColumnBuilder addMasked(String[] values, ValueKind[] mask) { + Objects.requireNonNull(values, "values"); + Objects.requireNonNull(mask, "mask"); + if (values.length != mask.length) { + throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")"); + } + + for (int i = 0; i < values.length; i++) { + ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]"); + if (k == ValueKind.PRESENT && (values[i] == null || ValueKind.isValueKindToken(values[i]))) { + throw new IllegalArgumentException("PRESENT value must not be null, '.' or '?': values[" + i + "]"); + } + switch (k) { + case PRESENT: + add(values[i]); + break; + case NOT_PRESENT: + markNextNotPresent(); + break; + case UNKNOWN: + markNextUnknown(); + break; + default: + throw new IllegalStateException("Unhandled ValueKind: " + k); + } + } + return this; + } + + /** + * Add values from an Iterable. + * @param values String values, null is mapped to NOT_PRESENT ("."); "." and "?" are interpreted as CIF tokens. + * @return this builder instance + */ + default StrColumnBuilder addNullable(Iterable values) { + Objects.requireNonNull(values, "values"); + for (String v : values) { + ValueKind valueKind = ValueKind.fromCifToken(v); + switch (valueKind) { + case PRESENT: + add(v); + break; + case NOT_PRESENT: + markNextNotPresent(); + break; + case UNKNOWN: + markNextUnknown(); + break; + default: + throw new IllegalStateException("Unhandled ValueKind: " + valueKind); + } + } + return this; + } } diff --git a/src/main/java/org/rcsb/cif/model/ValueKind.java b/src/main/java/org/rcsb/cif/model/ValueKind.java index c7564dfa7..3e1680e89 100644 --- a/src/main/java/org/rcsb/cif/model/ValueKind.java +++ b/src/main/java/org/rcsb/cif/model/ValueKind.java @@ -36,7 +36,7 @@ public static boolean isValueKindToken(String s) { * @return appropriate ValueKind for "?" and ".", otherwise marked as PRESENT */ public static ValueKind fromCifToken(String s) { - if (s == null || s.isEmpty() || CIF_NOT_PRESENT.equals(s)) return NOT_PRESENT; + if (s == null || CIF_NOT_PRESENT.equals(s)) return NOT_PRESENT; if (CIF_UNKNOWN.equals(s)) return UNKNOWN; return PRESENT; } From 5f5c0206e88db83c94f8a49e62128220d9004ef4 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Tue, 13 Jan 2026 16:13:46 +0100 Subject: [PATCH 7/7] refactor to use ValueKind helpers --- src/main/java/org/rcsb/cif/model/CategoryBuilder.java | 4 ++-- src/main/java/org/rcsb/cif/model/FloatColumn.java | 2 +- src/main/java/org/rcsb/cif/model/IntColumn.java | 2 +- src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/rcsb/cif/model/CategoryBuilder.java b/src/main/java/org/rcsb/cif/model/CategoryBuilder.java index 35ce05490..9f5acaa58 100644 --- a/src/main/java/org/rcsb/cif/model/CategoryBuilder.java +++ b/src/main/java/org/rcsb/cif/model/CategoryBuilder.java @@ -112,9 +112,9 @@ static > C createColumnText(String columnName, List value startToken[i] = builder.length(); String value = String.valueOf(values.get(i)); if (mask.get(i) == ValueKind.NOT_PRESENT) { - value = "."; + value = ValueKind.CIF_NOT_PRESENT; } else if (mask.get(i) == ValueKind.UNKNOWN) { - value = "?"; + value = ValueKind.CIF_UNKNOWN; } builder.append(value); endToken[i] = builder.length(); diff --git a/src/main/java/org/rcsb/cif/model/FloatColumn.java b/src/main/java/org/rcsb/cif/model/FloatColumn.java index 940c63863..362bce834 100644 --- a/src/main/java/org/rcsb/cif/model/FloatColumn.java +++ b/src/main/java/org/rcsb/cif/model/FloatColumn.java @@ -29,7 +29,7 @@ default DoubleStream values() { * @return a double */ static double parseFloat(String text) { - if (text.isEmpty() || ValueKind.CIF_NOT_PRESENT.equals(text) || ValueKind.CIF_UNKNOWN.equals(text)) { + if (text.isEmpty() || ValueKind.isValueKindToken(text)) { return 0; } // some formats specify uncertain decimal places like: 0.00012(3) - ignore them (in agreement with Mol*) diff --git a/src/main/java/org/rcsb/cif/model/IntColumn.java b/src/main/java/org/rcsb/cif/model/IntColumn.java index 1ceaec702..8921815da 100644 --- a/src/main/java/org/rcsb/cif/model/IntColumn.java +++ b/src/main/java/org/rcsb/cif/model/IntColumn.java @@ -29,7 +29,7 @@ default IntStream values() { * @return an int */ static int parseInt(String text) { - if (text.isEmpty() || ValueKind.CIF_NOT_PRESENT.equals(text) || ValueKind.CIF_UNKNOWN.equals(text)) { + if (text.isEmpty() || ValueKind.isValueKindToken(text)) { return 0; } // some floats may omit decimal places and can be parsed as int: 88. - ignore the dot (in agreement with Mol*) diff --git a/src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java b/src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java index 2b543170f..3e2bdf688 100644 --- a/src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java +++ b/src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java @@ -1,6 +1,7 @@ package org.rcsb.cif.model.binary; import org.rcsb.cif.model.StrColumn; +import org.rcsb.cif.model.ValueKind; public class BinaryStrColumn extends BinaryColumn implements StrColumn { private final String[] data; @@ -21,7 +22,7 @@ public String getStringData(int row) { } private String honorValueKind(String value) { - return (".".equals(value) || "?".equals(value)) ? "" : value; + return ValueKind.isValueKindToken(value) ? "" : value; } @Override