Skip to content

Commit b907ab6

Browse files
authored
Merge pull request #201 from JohnLCaron/classrename
minor cleanup and class renaming.
2 parents 8e4cf51 + 2703f0b commit b907ab6

17 files changed

Lines changed: 40 additions & 87 deletions

File tree

Readme.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# netchdf
2-
_last updated: 7/27/2025_
2+
_last updated: 7/28/2025_
33

44
This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries.
55

@@ -33,6 +33,8 @@ Please contact me if you'd like to help out. Especially needed are test datasets
3333
* [Compare with HDF5 data model](#compare-with-hdf5-data-model)
3434
* [Compare with HDF4 data model](#compare-with-hdf4-data-model)
3535
* [Compare with HDF-EOS data model](#compare-with-hdf-eos-data-model)
36+
* [Implementation Notes](#implementation-notes)
37+
* [Netcdf4 vs HDF5](#netcdf4-vs-hdf5)
3638
* [Elevator blurb](#elevator-blurb)
3739
<!-- TOC -->
3840

@@ -305,6 +307,24 @@ Please carefully check results if you have this kind of data, and send us sample
305307
* The _StructMetadata_ ODL is gathered and applied to the file header metadata as well as possible.
306308
Contact us with example files if you see something we are missing.
307309

310+
## Implementation Notes
311+
312+
### Netcdf4 vs HDF5
313+
314+
All Netcdf4 files are HDF5, but not all HDF5 files are Netcdf4. We'd like to be able to detect when a file was written
315+
using the Netcdf-4 library, but its not possible to always tell for certain. If any of the following are true, we set
316+
isNetcdf4 = true.
317+
318+
1. If a group or variable has an attribute with name "_NCProperties", "_Netcdf4Coordinates", "_Netcdf4Dimid" or "_nc3_strict".
319+
2. If a variable name starts with "_nc4_non_coord_".
320+
3. If a variable has an attrinute named "DIMENSION_LIST with type vlen of reference.
321+
4. If a dimenson name starts with "This is a netCDF dimension but not a netCDF variable"
322+
323+
Other than trying to identify which library wrote the file, Netchdf does not do any special processing for Netcdf4 files,
324+
except:
325+
326+
1. When testing, use the Netcdf4 C library when comparing data and metadata.
327+
308328
## Elevator blurb
309329

310330
An independent implementation of HDF4/HDF5/HDF-EOS in Kotlin.

core/src/commonMain/kotlin/com/sunya/cdm/api/Netchdf.kt

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ interface Netchdf : AutoCloseable {
1717
// TODO I think the output type is not always the input type
1818
fun <T> readArrayData(v2: Variable<T>, wantSection: SectionPartial? = null) : ArrayTyped<T>
1919

20-
// iterate over all the chunks in section, order is arbitrary. TODO where is intersection with wantSection done ??
20+
// iterate over all the chunks in section, order is arbitrary.
2121
fun <T> chunkIterator(v2: Variable<T>, wantSection: SectionPartial? = null, maxElements : Int? = null) : Iterator<ArraySection<T>>
2222

2323
// iterate over all the chunks in section, order is arbitrary, callbacks are in multiple threads.
@@ -31,9 +31,4 @@ interface Netchdf : AutoCloseable {
3131
}
3232

3333
// the section describes the array chunk reletive to the variable's shape.
34-
data class ArraySection<T>(val array : ArrayTyped<T>, val chunkSection : Section) {
35-
fun intersect(wantSection: SectionPartial) : ArrayTyped<T> {
36-
// TODO ??
37-
return array
38-
}
39-
}
34+
data class ArraySection<T>(val array : ArrayTyped<T>, val chunkSection : Section)

core/src/commonMain/kotlin/com/sunya/cdm/array/ArrayStructureData.kt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,8 @@ class ArrayStructureData(shape : IntArray, val ba : ByteArray, val isBE: Boolean
2222
}
2323

2424
private val heap = mutableMapOf<Int, Any>()
25-
// private var heapIndex = 0
2625
internal fun putOnHeap(offset: Int, value: Any) {
2726
heap[offset] = value
28-
// ba.putInt(offset, heapIndex) // TODO clobber the ByteArray ?? Or just use the byte pos, which is unique
29-
//val result = heapIndex
30-
// heapIndex++
31-
// return result
3227
}
3328

3429
internal fun getFromHeap(offset: Int): Any? {

core/src/commonMain/kotlin/com/sunya/cdm/util/Math.kt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,6 @@ fun unsignedByteToShort(b: Byte): Short {
129129
*/
130130

131131
////////////////////////////////////////////////////////////////////////
132-
// TODO
133-
// doubleIsNearlyEqual() doublesAreNearlyEqual
134-
135132
const val defaultMaxRelativeDiffFloat = 1.0e-5f
136133

137134
/** The default maximum relative difference for floats, when comparing as doubles. */

core/src/commonMain/kotlin/com/sunya/netchdf/NetchdfFileFormat.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ enum class NetchdfFileFormat(private val version: Int, private val formatName: S
107107
NC_FORMAT_64BIT_OFFSET(2, "netcdf-3 64bit-offset"),
108108
NC_FORMAT_NETCDF4(3, "NetCDF-4"), // This is really just HDF-5, dont know yet if its written by netcdf4.
109109
NC_FORMAT_NETCDF4_CLASSIC(4, "netcdf-4 classic"), // psuedo format I think
110-
NC_FORMAT_64BIT_DATA(5, "netcdf-5"), // TODO support this; need test files
110+
NC_FORMAT_64BIT_DATA(5, "netcdf-5"), // we have one test file: ../core/src/commonTest/data/jays_DOMAIN000.nc
111111

112112
HDF5(5, "hdf5"), // not written by netcdf C library
113113
HDF4(6, "hdf4"); // not written by netcdf C library

core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ internal class BTree1data(
2424
rootNode = BTreeNode(rootNodeAddress, null)
2525
}
2626

27-
// if other layouts like BTree2data had this interface we could use in chunkConcurrent
2827
override fun asSequence(): Sequence<DataChunk> = sequence {
2928
repeat( tiling.nelems) {
3029
yield(findDataChunk(it) ?: missingDataChunk(it, tiling))
@@ -33,6 +32,8 @@ internal class BTree1data(
3332

3433
fun chunkIterator(): Iterator<DataChunk> = asSequence().iterator()
3534

35+
fun countChunks() = asSequence().count()
36+
3637
internal fun findDataChunk(order: Int): DataChunk? {
3738
return rootNode.findDataChunk(order)
3839
}

core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/FractalHeap.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,6 @@ internal class FractalHeap(private val h5: H5builder, forWho: String, address: L
186186
return record1.hugeObjectAddress
187187
}
188188

189-
// 3, 4 -> return offset.toLong() // TODO only a guess
190189
else -> throw RuntimeException("Unknown DHeapId subtype =$subtype")
191190
}
192191
}

core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5TypeInfo.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ internal data class H5TypeInfo(val isVlenString: Boolean, val isRefObject : Bool
5757

5858
Datatype5.Floating ->
5959
when (this.elemSize) {
60+
// 2 -> "half float" see jhdf
6061
4 -> Datatype.FLOAT
6162
8 -> Datatype.DOUBLE
6263
else -> throw RuntimeException("Bad hdf5 float type with size= ${this.elemSize}")

core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkConcurrent.kt renamed to core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5readChunkedConcurrent.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import kotlinx.coroutines.runBlocking
2727
import kotlinx.coroutines.yield
2828

2929
@ExperimentalCoroutinesApi
30-
class H5chunkConcurrent<T>(val h5: H5builder, val v2: Variable<T>, wantSection: SectionPartial?, ) {
30+
class H5readChunkedConcurrent<T>(val h5: H5builder, val v2: Variable<T>, wantSection: SectionPartial?, ) {
3131
val rafext: OpenFileExtended = h5.makeFileExtended()
3232

3333
val varShape = v2.shape

core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt renamed to core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5readerChunked.kt

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,10 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
2727
throw RuntimeException("Illegal nbytes to read = $sizeBytes")
2828
}
2929
val ba = ByteArray(sizeBytes.toInt())
30-
31-
// just reading into memory the entire index for now
32-
// val index = BTree2j(h5, v2.name, vinfo.dataPos, vinfo.storageDims)
33-
3430
val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
3531
val state = OpenFileState(0L, vinfo.h5type.isBE)
3632

37-
// just run through all the chunks, we wont read any that we dont need
33+
// run through all the chunks, we wont read any that we dont need
3834
for (dataChunk: DataChunk in index) {
3935
val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims)
4036
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
@@ -62,60 +58,6 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
6258
}
6359
}
6460

65-
/* DataLayoutBTreeVer1
66-
internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
67-
val vinfo = v2.spObject as DataContainerVariable
68-
val h5type = vinfo.h5type
69-
70-
val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size
71-
val datatype = vinfo.h5type.datatype()
72-
73-
val wantSpace = IndexSpace(wantSection)
74-
val sizeBytes = wantSpace.totalElements * elemSize
75-
if (sizeBytes <= 0 || sizeBytes >= Int.MAX_VALUE) {
76-
throw RuntimeException("Illegal nbytes to read = $sizeBytes")
77-
}
78-
val ba = ByteArray(sizeBytes.toInt())
79-
80-
val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) {
81-
val rafext: OpenFileExtended = this.openNewFileExtended()
82-
BTree1data(rafext, vinfo.dataPos, v2.shape, vinfo.storageDims)
83-
} else {
84-
throw RuntimeException("Unsupported mdl ${vinfo.mdl}")
85-
}
86-
87-
//val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
88-
val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
89-
//if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")
90-
91-
var transferChunks = 0
92-
val state = OpenFileState(0L, vinfo.h5type.isBE)
93-
btree1.asSequence().forEach { dataChunk ->
94-
val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims)
95-
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
96-
if (dataChunk.isMissing()) {
97-
if (debugChunking) println(" missing ${dataChunk.show()}")
98-
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
99-
} else {
100-
if (debugChunking) println(" chunk=${dataChunk.show()}")
101-
state.pos = dataChunk.address
102-
val chunkData = this.raf.readByteArray(state, dataChunk.size)
103-
val filteredData = if (dataChunk.filterMask == null) chunkData
104-
else filters.apply(chunkData, dataChunk.filterMask)
105-
chunker.transferBA(filteredData, 0, elemSize, ba, 0)
106-
transferChunks += chunker.transferChunks
107-
}
108-
}
109-
110-
val shape = wantSpace.shape.toIntArray()
111-
112-
return if (h5type.datatype5 == Datatype5.Vlen) {
113-
this.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize)
114-
} else {
115-
this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped<T>
116-
}
117-
} */
118-
11961
internal fun <T> readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
12062
val vinfo = v2.spObject as DataContainerVariable
12163
val datatype = vinfo.h5type.datatype()
@@ -149,7 +91,6 @@ internal fun <T> readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable<T>, wa
14991
val dataSection = IndexSpace(dataChunk.chunkSection)
15092
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
15193
chunker.forEach {
152-
// println(it)
15394
dataChunk.array.transfer(values, it)
15495
}
15596
}

0 commit comments

Comments
 (0)