Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions core/src/commonMain/kotlin/com/sunya/cdm/layout/Tiling.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.sunya.cdm.layout

import com.sunya.cdm.api.computeSize
import com.sunya.cdm.api.toIntArray
import com.sunya.cdm.api.toLongArray
import kotlin.math.max
import kotlin.math.min

Expand Down Expand Up @@ -83,6 +85,10 @@ class Tiling(varShape: LongArray, chunkShape: LongArray) {
return order
}

fun order(index: IntArray): Int {
return order(index.toLongArray()).toInt()
}

/** inverse of order() */
fun orderToIndex(order: Long) : LongArray {
// calculate tile
Expand All @@ -99,6 +105,10 @@ class Tiling(varShape: LongArray, chunkShape: LongArray) {
return index(tile)
}

fun orderToIndex(order: Int) : IntArray {
return orderToIndex(order.toLong()).toIntArray()
}

/**
* Create an ordering of index points based on which tile the point is in.
*
Expand Down
38 changes: 10 additions & 28 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.toIntArray
import com.sunya.cdm.iosp.OpenFileState
import com.sunya.cdm.layout.Tiling
import com.sunya.cdm.util.InternalLibraryApi
Expand All @@ -24,11 +25,11 @@ internal class BTree1data(
}

// if other layouts like BTree2data had this interface we could use in chunkConcurrent
override fun asSequence(): Sequence<DataChunkIF> = sequence {
override fun asSequence(): Sequence<DataChunk> = sequence {
repeat( tiling.nelems) {
//val startingIndex = tiling.orderToIndex(it.toLong())
//val indexSpace = IndexSpace(startingIndex, tiling.chunk)
yield(findDataChunk(it) ?: missingDataChunk(it))
yield(findDataChunk(it) ?: missingDataChunk(it, tiling))
}
}

Expand All @@ -41,7 +42,7 @@ internal class BTree1data(
var level: Int = 0
var nentries: Int = 0

val keyValues = mutableListOf<Pair<Int, DataChunk>>() // tile order to DataChunk
val dataChunks = mutableListOf<DataChunk>() // tile order to DataChunk
val children = mutableListOf<BTreeNode>()

var lastOrder : Int = 0
Expand All @@ -63,12 +64,13 @@ internal class BTree1data(
repeat(nentries) {
val chunkSize = raf.readInt(state)
val filterMask = raf.readInt(state)
val inner = LongArray(ndimStorage) { j -> raf.readLong(state) }
val order = tiling.order(inner).toInt()
val key = DataChunkKey(order, chunkSize, filterMask)
val chunkOffset = LongArray(ndimStorage) { j -> raf.readLong(state) }
val order = tiling.order(chunkOffset).toInt()
val childPointer = raf.readAddress(state) // 4 or 8 bytes, then add fileOffset
if (level == 0) {
keyValues.add(Pair(order, DataChunk(key, childPointer)))
// data class DataChunk(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?, val order: Int, val tiling: Tiling?=null) {
val dataChunk = DataChunk(childPointer, chunkSize, chunkOffset.toIntArray(), filterMask, order, tiling)
dataChunks.add(dataChunk)
lastOrder = order
} else {
children.add( BTreeNode(childPointer, this) )
Expand All @@ -91,8 +93,7 @@ internal class BTree1data(
return childNode.findDataChunk(wantOrder)
}
} else { // If it's a leaf node (no children)
val kv = keyValues.find { it.first == wantOrder }
return kv?.second
return dataChunks.find { it.order == wantOrder }
}
return null
}
Expand All @@ -102,24 +103,5 @@ internal class BTree1data(
}

}

data class DataChunkKey(val order: Int, val chunkSize: Int, val filterMask : Int)

inner class DataChunk(val key : DataChunkKey, val childAddress : Long) : DataChunkIF {
override fun childAddress() = childAddress
override fun offsets() = tiling.orderToIndex(key.order.toLong())
override fun isMissing() = (childAddress <= 0L) // may be 0 or -1
override fun chunkSize() = key.chunkSize
override fun filterMask() = key.filterMask
override fun show() = show(tiling)

fun show(tiling : Tiling) : String = "order=$key, chunkSize=${key.chunkSize}, chunkStart=${offsets().contentToString()}" +
", tile= ${tiling.tile(offsets() ).contentToString()}"

}

fun missingDataChunk(order: Int) : DataChunk {
return DataChunk(DataChunkKey(order, 0, 0), -1L)
}
}

53 changes: 25 additions & 28 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2data.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.computeSize
import com.sunya.cdm.api.toIntArray
import com.sunya.cdm.api.toLongArray
import com.sunya.cdm.iosp.OpenFileState

import com.sunya.cdm.layout.Tiling
Expand Down Expand Up @@ -57,25 +56,24 @@ internal class BTree2data(
rootNode = BTreeNode(rootNodeAddress, treeDepth, numberOfRecordsInRoot, totalNumberOfRecordsInTree, null)
}

override fun asSequence(): Sequence<DataChunkIF> = sequence {
override fun asSequence(): Sequence<DataChunk> = sequence {
repeat( tiling.nelems) {
//val startingIndex = tiling.orderToIndex(it.toLong())
//val indexSpace = IndexSpace(startingIndex, tiling.chunk)
yield(findDataChunk(it) ?: missingDataChunk(it))
val result = findDataChunk(it) ?: missingDataChunk(it, tiling)
yield(result)
}
}

fun chunkIterator(): Iterator<DataChunkIF> = asSequence().iterator()
fun chunkIterator(): Iterator<DataChunk> = asSequence().iterator()

internal fun findDataChunk(order: Int): DataChunkIF? {
internal fun findDataChunk(order: Int): DataChunk? {
return rootNode.findDataChunk(order)
}

inner class BTreeNode(val address: Long, depth: Int, numberOfRecords: Int, totalRecords: Int, val parent: BTreeNode?) {
var level: Int = 0
var nentries: Int = 0

val keyValues = mutableListOf<Pair<Int, ChunkImpl>>() // tile order to DataChunk
val dataChunks = mutableListOf<DataChunk>() // tile order to DataChunk
val children = mutableListOf<BTreeNode>()

var lastOrder : Int = 0
Expand All @@ -99,10 +97,9 @@ internal class BTree2data(

// dataChunks
repeat(numberOfRecords) {
val chunkImpl = readRecord(state, nodeType)
val order = tiling.order(chunkImpl.chunkOffset.toLongArray()).toInt()
keyValues.add(Pair(order, chunkImpl))
lastOrder = order
val dataChunk = readRecord(state, nodeType)
dataChunks.add(dataChunk)
lastOrder = dataChunk.order
}

// children
Expand All @@ -128,16 +125,20 @@ internal class BTree2data(
}

// uses a tree search = O(log n)
fun findDataChunk(wantOrder: Int): DataChunkIF? {
// this algo assume you dont have xised noted, not true
fun findDataChunk(wantOrder: Int): DataChunk? {
if (dataChunks.isNotEmpty()) {
val result = dataChunks.find { it.order == wantOrder }
if (result != null) return result
}
if (children.isNotEmpty()) { // search tree; assumes that chunks are ordered
children.forEach { childNode ->
if (wantOrder <= childNode.lastOrder)
return childNode.findDataChunk(wantOrder)
}
} else { // If it's a leaf node (no children)
val kv = keyValues.find { it.first == wantOrder }
return kv?.second
}
} //else { // If it's a leaf node (no children)
// return dataChunks.find { it.order == wantOrder }
//}
return null
}

Expand All @@ -147,7 +148,7 @@ internal class BTree2data(

} // BTreeNode

fun readRecord(state: OpenFileState, type: Int): ChunkImpl {
fun readRecord(state: OpenFileState, type: Int): DataChunk {
return when (type) {
10 -> readRecord10(state, chunkShape.toIntArray(), chunkSize.toInt())
11 -> readRecord11(state, chunkShape.toIntArray() )
Expand All @@ -156,7 +157,7 @@ internal class BTree2data(
}

// Type 10 Record Layout - Non-filtered Dataset Chunks
fun readRecord10(state: OpenFileState, dims : IntArray, chunkSize: Int): ChunkImpl {
fun readRecord10(state: OpenFileState, dims : IntArray, chunkSize: Int): DataChunk {
val address = raf.readOffset(state)

// This field is the scaled offset of the chunk within the dataset. n is the number of dimensions for the dataset.
Expand All @@ -169,13 +170,13 @@ internal class BTree2data(
// for (int i = 0; i < chunkOffset.length; i++) {
// chunkOffset[i] = Utils.readBytesAsUnsignedInt(buffer, 8) * datasetInfo.getChunkDimensions()[i];
// }
val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }
val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }.toIntArray()

return ChunkImpl(address, chunkSize, chunkOffset.toIntArray(), null, tiling)
return DataChunk(address, chunkSize, chunkOffset, null, tiling.order(chunkOffset), tiling)
}

// Type 11 Record Layout - Filtered Dataset Chunks
fun readRecord11(state: OpenFileState, dims : IntArray): ChunkImpl {
fun readRecord11(state: OpenFileState, dims : IntArray): DataChunk {
val address = raf.readOffset(state)

// LOOK variable size based on what? "Chunk Size (variable size; at most 8 bytes)"
Expand All @@ -200,14 +201,10 @@ internal class BTree2data(
// for (int i = 0; i < chunkOffset.length; i++) {
// chunkOffset[i] = Utils.readBytesAsUnsignedInt(buffer, 8) * datasetInfo.getChunkDimensions()[i];
// }
val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }
val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }.toIntArray()

// ChunkImpl(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?)
return ChunkImpl(address, chunkSize, chunkOffset.toIntArray(), filterMask, tiling)
}

fun missingDataChunk(order: Int) : ChunkImpl {
return ChunkImpl(-1, 0, tiling.orderToIndex(order.toLong()).toIntArray(), 0, tiling)
return DataChunk(address, chunkSize, chunkOffset, filterMask, tiling.order(chunkOffset), tiling)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.computeSize
import com.sunya.cdm.api.toLongArray
import com.sunya.cdm.iosp.OpenFileIF
import com.sunya.cdm.iosp.OpenFileState
import com.sunya.cdm.layout.Tiling
import com.sunya.cdm.util.InternalLibraryApi
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlin.Long
import kotlin.math.ceil

// DataLayoutMessage version 4, layout class 2 (chunked), chunkIndexingType 1-5
Expand Down Expand Up @@ -81,7 +81,7 @@ internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val md
val dataAddress: Long

val state = OpenFileState(h5.getFileOffset(mdl.indexAddress), false)
val chunks = mutableListOf<ChunkImpl>()
val chunks = mutableListOf<DataChunk>()

init {
val raf = h5.raf
Expand Down Expand Up @@ -200,18 +200,18 @@ internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val md
val filterMask = raf.readInt(state) // java.util.BitSet = java.util.BitSet.valueOf(byteArrayOf(bb.get(), bb.get(), bb.get(), bb.get()))
val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape)

chunks.add(ChunkImpl(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask))
chunks.add(makeDataChunk(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask))
}

fun readUnfiltered(raf: OpenFileIF, state : OpenFileState, chunkIndex: Int) {
val chunkAddress = h5.readOffset(state) // val chunkAddress: Long = Utils.readBytesAsUnsignedLong(bb, sizeOfOffsets)
val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape)
val unfilteredChunkSize = mdl.chunkDimensions.computeSize()

chunks.add(ChunkImpl(chunkAddress, unfilteredChunkSize, chunkOffset, null))
chunks.add(makeDataChunk(chunkAddress, unfilteredChunkSize, chunkOffset, 0, null))
}

fun chunkIterator() : Iterator<ChunkImpl> = chunks.iterator()
fun chunkIterator() : Iterator<DataChunk> = chunks.iterator()

companion object {
val logger = KotlinLogging.logger("ChunkedDataLayoutMessageV4")
Expand All @@ -224,16 +224,16 @@ internal class ImplicitChunkIndex(val h5: H5builder, val varShape: IntArray, val
val chunkDimensions = IntArray(mdl.chunkDimensions.size - 1) { mdl.chunkDimensions[it] } // remove the element "dimension"
var chunkSize = mdl.chunkDimensions.computeSize()

fun getAllChunks(): List<ChunkImpl> {
fun getAllChunks(): List<DataChunk> {
val totalChunks: Int = totalChunks(varShape, chunkDimensions)
val chunks = mutableListOf<ChunkImpl>()
val chunks = mutableListOf<DataChunk>()
for (i in 0..< totalChunks) {
chunks.add(
ChunkImpl(
makeDataChunk(
mdl.address + i * chunkSize,
chunkSize,
chunkIndexToChunkOffset(i, chunkDimensions, varShape),
null)
0, null)
)
}
return chunks
Expand All @@ -250,7 +250,7 @@ internal class ImplicitChunkIndex(val h5: H5builder, val varShape: IntArray, val
return chunks
}

fun chunkIterator() : Iterator<ChunkImpl> = getAllChunks().iterator()
fun chunkIterator() : Iterator<DataChunk> = getAllChunks().iterator()

}

Expand All @@ -275,26 +275,6 @@ fun chunkIndexToChunkOffset(chunkIndex: Int, chunkDimensions: IntArray, datasetD
}

////////////////////////////////////////////////////
data class ChunkImpl(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?, val tiling: Tiling?=null): DataChunkIF {
override fun toString(): String {
return "ChunkImpl(address=$address, size=$size, chunkOffset=${chunkOffset.contentToString()}, filterMask=$filterMask)"
}

override fun childAddress() = address

override fun offsets() = chunkOffset.toLongArray()

override fun isMissing() = address <= 0

override fun chunkSize() = size

override fun filterMask() = filterMask ?: 0

override fun show(): String {
return if (tiling != null) {
"address=$address, chunkSize=${size}, chunkStart=${offsets().contentToString()}, tile= ${tiling.tile(offsets() ).contentToString()}"
} else {
"TODO(Not yet implemented)"
}
}
}
fun makeDataChunk(address: Long, size: Int, chunkOffset: IntArray, filterMask: Int, tiling: Tiling?=null) =
DataChunk( address, size, chunkOffset, filterMask, 0, tiling)
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.toLongArray
import com.sunya.cdm.layout.Tiling

interface DataChunkSequence {
fun asSequence(): Sequence<DataChunkIF>
fun asSequence(): Sequence<DataChunk>
}

interface DataChunkIF {
fun childAddress(): Long
fun offsets(): LongArray
fun isMissing(): Boolean
fun chunkSize(): Int
fun filterMask(): Int
data class DataChunk(val address: Long, val size: Int, val offsets: IntArray, val filterMask: Int?, val order: Int, val tiling: Tiling?) {
fun isMissing() = (address <= 0)
fun show() : String = "order=$order, chunkSize=${size}, chunkStart=${offsets.contentToString()}" +
", tile= ${tiling?.tile(offsets.toLongArray()).contentToString()}"
}

fun show(): String
fun missingDataChunk(order: Int, tiling: Tiling) : DataChunk {
return DataChunk(-1, 0, tiling.orderToIndex(order), 0, order, tiling)
}
Loading