Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ This project uses semantic versioning. Furthermore, this project provides code t
* `diffrn_radiation_wavelength_id` removed
* `geom_bond_distance_min` renamed to `geom_min_bond_distance_cutoff`

### General
* improved builder ergonomics
* added overloads that make it easier to build columns with missing values
* validate that all columns in a category are equal in size

ciftools-java 7.0.2 - September 2025
-------------
### Bug fixes
Expand Down
16 changes: 12 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,15 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.14.0</version>
<version>6.0.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>6.0.2</version>
<scope>test</scope>
</dependency>

</dependencies>

<properties>
Expand All @@ -85,6 +90,9 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.5.4</version>
<configuration>
<failIfNoTests>true</failIfNoTests>
</configuration>
</plugin>
</plugins>
</build>
Expand All @@ -98,7 +106,7 @@
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.9.0</version>
<version>0.10.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>central</publishingServerId>
Expand Down Expand Up @@ -137,7 +145,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.1</version>
<version>3.4.0</version>
<executions>
<execution>
<id>attach-sources</id>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/rcsb/cif/model/CategoryBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ static <C extends Column<?>> C createColumnText(String columnName, List<?> value
startToken[i] = builder.length();
String value = String.valueOf(values.get(i));
if (mask.get(i) == ValueKind.NOT_PRESENT) {
value = ".";
value = ValueKind.CIF_NOT_PRESENT;
} else if (mask.get(i) == ValueKind.UNKNOWN) {
value = "?";
value = ValueKind.CIF_UNKNOWN;
}
builder.append(value);
endToken[i] = builder.length();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/rcsb/cif/model/FloatColumn.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ default DoubleStream values() {
* @return a double
*/
static double parseFloat(String text) {
if (text.isEmpty() || ".".equals(text) || "?".equals(text)) {
if (text.isEmpty() || ValueKind.isValueKindToken(text)) {
return 0;
}
// some formats specify uncertain decimal places like: 0.00012(3) - ignore them (in agreement with Mol*)
Expand Down
57 changes: 57 additions & 0 deletions src/main/java/org/rcsb/cif/model/FloatColumnBuilder.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.rcsb.cif.model;

import java.util.List;
import java.util.Objects;

/**
* A builder instance for {@link FloatColumn} instances.
Expand All @@ -27,4 +28,60 @@ public interface FloatColumnBuilder<P extends CategoryBuilder<PP, PPP>, PP exten
* @return this builder instance
*/
FloatColumnBuilder<P, PP, PPP> add(double... values);

/**
* Add new values with fine-grained control.
* <p>
* For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended.
* For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to
* {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively.
* </p>
* @param values array of double values
* @param mask array of {@link ValueKind}, must be the same length as {@code values}
* @return this builder instance
* @throws IllegalArgumentException if arrays differ in size
* @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null
*/
default FloatColumnBuilder<P,PP,PPP> addMasked(double[] values, ValueKind[] mask) {
Objects.requireNonNull(values, "values");
Objects.requireNonNull(mask, "mask");
if (values.length != mask.length) {
throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")");
}

for (int i = 0; i < values.length; i++) {
ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]");
switch (k) {
case PRESENT:
add(values[i]);
break;
case NOT_PRESENT:
markNextNotPresent();
break;
case UNKNOWN:
markNextUnknown();
break;
default:
throw new IllegalStateException("Unhandled ValueKind: " + k);
}
}
return this;
}

/**
* Add values from an Iterable.
* @param values Double values, null is mapped to ValueKind.NOT_PRESENT (".")
* @return this builder instance
*/
default FloatColumnBuilder<P,PP,PPP> addNullable(Iterable<Double> values) {
Objects.requireNonNull(values, "values");
for (Double v : values) {
if (v == null) {
markNextNotPresent();
} else {
add(v);
}
}
return this;
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/rcsb/cif/model/IntColumn.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ default IntStream values() {
* @return an int
*/
static int parseInt(String text) {
if (text.isEmpty() || ".".equals(text) || "?".equals(text)) {
if (text.isEmpty() || ValueKind.isValueKindToken(text)) {
return 0;
}
// some floats may omit decimal places and can be parsed as int: 88. - ignore the dot (in agreement with Mol*)
Expand Down
57 changes: 57 additions & 0 deletions src/main/java/org/rcsb/cif/model/IntColumnBuilder.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.rcsb.cif.model;

import java.util.List;
import java.util.Objects;

/**
* A builder instance for {@link IntColumn} instances.
Expand All @@ -27,4 +28,60 @@ public interface IntColumnBuilder<P extends CategoryBuilder<PP, PPP>, PP extends
* @return this builder instance
*/
IntColumnBuilder<P, PP, PPP> add(int... values);

/**
* Add new values with fine-grained control.
* <p>
* For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended.
* For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to
* {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively.
* </p>
* @param values array of int values
* @param mask array of {@link ValueKind}, must be the same length as {@code values}
* @return this builder instance
* @throws IllegalArgumentException if arrays differ in size
* @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null
*/
default IntColumnBuilder<P,PP,PPP> addMasked(int[] values, ValueKind[] mask) {
Objects.requireNonNull(values, "values");
Objects.requireNonNull(mask, "mask");
if (values.length != mask.length) {
throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")");
}

for (int i = 0; i < values.length; i++) {
ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]");
switch (k) {
case PRESENT:
add(values[i]);
break;
case NOT_PRESENT:
markNextNotPresent();
break;
case UNKNOWN:
markNextUnknown();
break;
default:
throw new IllegalStateException("Unhandled ValueKind: " + k);
}
}
return this;
}

/**
* Add values from an Iterable.
* @param values Integer values, null is mapped to ValueKind.NOT_PRESENT (".")
* @return this builder instance
*/
default IntColumnBuilder<P,PP,PPP> addNullable(Iterable<Integer> values) {
Objects.requireNonNull(values, "values");
for (Integer v : values) {
if (v == null) {
markNextNotPresent();
} else {
add(v);
}
}
return this;
}
}
69 changes: 69 additions & 0 deletions src/main/java/org/rcsb/cif/model/StrColumnBuilder.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.rcsb.cif.model;

import java.util.List;
import java.util.Objects;

/**
* A builder instance for {@link StrColumn} instances.
Expand All @@ -27,4 +28,72 @@ public interface StrColumnBuilder<P extends CategoryBuilder<PP, PPP>, PP extends
* @return this builder instance
*/
StrColumnBuilder<P, PP, PPP> add(String... values);

/**
* Add new values with fine-grained control.
* <p>
* For {@link ValueKind#PRESENT}, the corresponding entry from {@code values} is appended.
* For {@link ValueKind#NOT_PRESENT} and {@link ValueKind#UNKNOWN}, this method delegates to
* {@link #markNextNotPresent()} and {@link #markNextUnknown()} respectively.
* </p>
* @param values array of String values
* @param mask array of {@link ValueKind}, must be the same length as {@code values}
* @return this builder instance
* @throws IllegalArgumentException if arrays differ in size
* @throws NullPointerException if {@code values}, {@code mask}, or any {@code mask[i]} is null
*/
default StrColumnBuilder<P,PP,PPP> addMasked(String[] values, ValueKind[] mask) {
Objects.requireNonNull(values, "values");
Objects.requireNonNull(mask, "mask");
if (values.length != mask.length) {
throw new IllegalArgumentException("values.length (" + values.length + ") must equal mask.length (" + mask.length + ")");
}

for (int i = 0; i < values.length; i++) {
ValueKind k = Objects.requireNonNull(mask[i], "mask[" + i + "]");
if (k == ValueKind.PRESENT && (values[i] == null || ValueKind.isValueKindToken(values[i]))) {
throw new IllegalArgumentException("PRESENT value must not be null, '.' or '?': values[" + i + "]");
}
switch (k) {
case PRESENT:
add(values[i]);
break;
case NOT_PRESENT:
markNextNotPresent();
break;
case UNKNOWN:
markNextUnknown();
break;
default:
throw new IllegalStateException("Unhandled ValueKind: " + k);
}
}
return this;
}

/**
* Add values from an Iterable.
* @param values String values, null is mapped to NOT_PRESENT ("."); "." and "?" are interpreted as CIF tokens.
* @return this builder instance
*/
default StrColumnBuilder<P,PP,PPP> addNullable(Iterable<String> values) {
Objects.requireNonNull(values, "values");
for (String v : values) {
ValueKind valueKind = ValueKind.fromCifToken(v);
switch (valueKind) {
case PRESENT:
add(v);
break;
case NOT_PRESENT:
markNextNotPresent();
break;
case UNKNOWN:
markNextUnknown();
break;
default:
throw new IllegalStateException("Unhandled ValueKind: " + valueKind);
}
}
return this;
}
}
25 changes: 24 additions & 1 deletion src/main/java/org/rcsb/cif/model/ValueKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,28 @@ public enum ValueKind {
/**
* The value is unknown - <code>?</code> in CIF. String values will be empty, number values will be 0.
*/
UNKNOWN
UNKNOWN;

public static final String CIF_NOT_PRESENT = ".";
public static final String CIF_UNKNOWN = "?";

/**
* Checks whether a String matches "?" or ".", sequences with special meaning in CIF.
* @param s payload to evaluate
* @return true if this String indicates missing or undefined values
*/
public static boolean isValueKindToken(String s) {
return CIF_NOT_PRESENT.equals(s) || CIF_UNKNOWN.equals(s);
}

/**
* Transforms a String into a ValueKind.
* @param s payload to evaluate
* @return appropriate ValueKind for "?" and ".", otherwise marked as PRESENT
*/
public static ValueKind fromCifToken(String s) {
if (s == null || CIF_NOT_PRESENT.equals(s)) return NOT_PRESENT;
if (CIF_UNKNOWN.equals(s)) return UNKNOWN;
return PRESENT;
}
}
3 changes: 2 additions & 1 deletion src/main/java/org/rcsb/cif/model/binary/BinaryStrColumn.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.rcsb.cif.model.binary;

import org.rcsb.cif.model.StrColumn;
import org.rcsb.cif.model.ValueKind;

public class BinaryStrColumn extends BinaryColumn<String[]> implements StrColumn {
private final String[] data;
Expand All @@ -21,7 +22,7 @@ public String getStringData(int row) {
}

private String honorValueKind(String value) {
return (".".equals(value) || "?".equals(value)) ? "" : value;
return ValueKind.isValueKindToken(value) ? "" : value;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ public class CategoryBuilderImpl<P extends BlockBuilder<PP>, PP extends CifFileB
protected final P parent;
private final List<ColumnBuilder<? extends CategoryBuilder<P, PP>, P, PP>> pendingDigests;
private final List<ColumnBuilder<? extends CategoryBuilder<P, PP>, P, PP>> finishedDigests;
private Integer expectedRowCount;

public CategoryBuilderImpl(String categoryName, P parent) {
this.categoryName = categoryName;
this.columns = new LinkedHashMap<>();
this.parent = parent;
this.pendingDigests = new ArrayList<>();
this.finishedDigests = new ArrayList<>();
this.expectedRowCount = null;
}

@Override
Expand All @@ -46,7 +48,6 @@ public Map<String, Column<?>> getColumns() {
return columns;
}

@SuppressWarnings("unchecked")
@Override
public P leaveCategory() {
if (parent == null) {
Expand All @@ -57,6 +58,17 @@ public P leaveCategory() {
pendingDigests.stream()
.filter(child -> !finishedDigests.contains(child))
.forEach(child -> {
// assert all columns are equal in size
int n = child.getMask().size();
if (expectedRowCount == null) {
expectedRowCount = n;
} else if (expectedRowCount != n) {
throw new IllegalStateException("Category '" + categoryName + "': column '" +
child.getColumnName() + "' has " + n + " rows, but expected " + expectedRowCount +
" based on previously digested columns. Ensure every column receives exactly one value " +
"per row (use markNextUnknown()/markNextNotPresent() for missing values).");
}

if (child instanceof IntColumnBuilder) {
digest((IntColumnBuilder<? extends CategoryBuilder<P, PP>, P, PP>) child);
} else if (child instanceof FloatColumnBuilder) {
Expand Down
Loading