Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cli/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
}

dependencies {
implementation(project(":core"))
api(project(":core"))

implementation(libs.lzf)
implementation(libs.lz4)
Expand Down Expand Up @@ -33,4 +33,4 @@ tasks.register<Jar>("uberJar") {
})
}

project.tasks["compileJava"].dependsOn(":core:allMetadataJar")
// project.tasks["compileJava"].dependsOn(":core:allMetadataJar")
4 changes: 4 additions & 0 deletions core/src/commonMain/kotlin/com/sunya/cdm/api/Variable.kt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ data class Variable<T>(
return "$datatype ${fullname()}${shape.contentToString()}"
}

override fun toString(): String {
return "Variable(${nameAndShape()}, group=${group.fullname()}, nelems=$nelems, spObject=$spObject)"
}

@InternalLibraryApi
class Builder<T>(val name : String, val datatype : Datatype<T>) {
val dimensions = mutableListOf<Dimension>()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
@file:OptIn(ExperimentalUnsignedTypes::class)

package com.sunya.cdm.array

import com.sunya.cdm.api.Datatype
Expand Down
8 changes: 4 additions & 4 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf4/Hdf4File.kt
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ class Hdf4File(val filename : String) : Netchdf {
}
}

override fun <T> chunkIterator(v2: Variable<T>, section: SectionPartial?, maxElements : Int?): Iterator<ArraySection<T>> {
override fun <T> chunkIterator(v2: Variable<T>, wantSection: SectionPartial?, maxElements : Int?): Iterator<ArraySection<T>> {
if (v2.nelems == 0L) {
return listOf<ArraySection<T>>().iterator()
}
val wantSection = SectionPartial.fill(section, v2.shape)
val section = SectionPartial.fill(wantSection, v2.shape)
val vinfo = v2.spObject as Vinfo

return if (vinfo.isChunked) { // LOOK isLinked?
H4chunkIterator(header, v2, wantSection)
H4chunkIterator(header, v2, section)
} else {
H4maxIterator(v2, wantSection, maxElements ?: 100_000)
H4maxIterator(v2, section, maxElements ?: 100_000)
}
}

Expand Down
4 changes: 2 additions & 2 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.sunya.cdm.iosp.OpenFileState
import com.sunya.cdm.layout.Tiling
import com.sunya.cdm.util.InternalLibraryApi

/** B-tree, version 1, used for data (node type 1) */
/** B-tree, version 1, used for data (node type 1)
internal class BTree1(
val h5: H5builder,
val rootNodeAddress: Long,
Expand Down Expand Up @@ -130,7 +130,7 @@ internal class BTree1(
", tile= ${tiling.tile(key.offsets).contentToString()} idx=$idx"
}

}
} */


interface DataChunkIF {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ internal class BTree1data(
override fun chunkSize() = key.chunkSize
override fun filterMask() = key.filterMask

override fun show(tiling : Tiling) : String = "chunkSize=${key.chunkSize}, chunkStart=${offsets().contentToString()}" +
override fun show(tiling : Tiling) : String = "order=$key, chunkSize=${key.chunkSize}, chunkStart=${offsets().contentToString()}" +
", tile= ${tiling.tile(offsets() ).contentToString()}"

fun show() = show(tiling)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ internal class BTree2data(private val h5: H5builder, owner: String, address: Lon
}
}

// TODO this is probably not handling missing chunks correctly. See BTree1data, which iterates over tiles.
fun chunkIterator() : Iterator<ChunkImpl> = ChunkIterator()

private inner class ChunkIterator : AbstractIterator<ChunkImpl>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.sunya.cdm.layout.IndexSpace
import com.sunya.cdm.layout.IndexND
import com.sunya.cdm.layout.Tiling

/** wraps BTree1 to handle iterating through tiled data (aka chunked data) */
/** wraps BTree1 to handle iterating through tiled data (aka chunked data)
internal class H5TiledData1(val btree : BTree1, val varShape: LongArray, val chunkShape: LongArray) {
private val check = true
private val debug = false
Expand Down Expand Up @@ -104,4 +104,4 @@ internal class H5TiledData1(val btree : BTree1, val varShape: LongArray, val chu
return "TiledData(chunk=${chunkShape.contentToString()}, readHit=$readHit, readMiss=$readMiss)"
}

}
} */
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,11 @@ internal class DataContainerVariable(
else -> throw RuntimeException()
}
}

override fun toString(): String {
return "DataContainerVariable(mdl=$mdl, mfp=$mfp, onlyFillValue=$onlyFillValue)"
}

}

internal fun getFillValue(h5 : H5builder, v5 : H5Variable, h5type: H5TypeInfo): ByteArray {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ import com.sunya.cdm.layout.IndexSpace
import com.sunya.cdm.layout.transferMissingNelems
import com.sunya.cdm.util.InternalLibraryApi

// TODO assumes BTree1, could it include BTree2? any chunked reader ?
// only used in Netchdf.readChunksConcurrent

/* (to be removed)
@OptIn(InternalLibraryApi::class)
internal class H5chunkIterator<T>(val h5 : H5builder, val v2: Variable<T>, val wantSection : Section) : AbstractIterator<ArraySection<T>>() {
private val debugChunking = false
Expand Down Expand Up @@ -86,5 +84,5 @@ internal class H5chunkIterator<T>(val h5 : H5builder, val v2: Variable<T>, val w

return ArraySection(array, intersectSpace.section(v2.shape)) // LOOK use space instead of Section ??
}
}
} */

95 changes: 61 additions & 34 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

package com.sunya.netchdf.hdf5

import com.fleeksoft.charset.decodeToString
import com.sunya.cdm.api.*
import com.sunya.cdm.array.*
import com.sunya.cdm.iosp.OpenFileState
Expand Down Expand Up @@ -62,8 +61,8 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
}
}

/* TODO can we use concurrent reading ??
internal fun <T> readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
/* DataLayoutBTreeVer1 (to be removed)
internal fun <T> H5builder.readBtreeVer1(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type

Expand All @@ -77,33 +76,45 @@ internal fun <T> readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyp
}
val ba = ByteArray(sizeBytes.toInt())

val reader = H5chunkConcurrent(h5, v2, wantSection)
val availableProcessors = com.sunya.netchdf.util.getAvailableProcessors()
reader.readChunks(availableProcessors, lamda = { asection: ArraySection<*> ->
val (array, section) = asection
println(" section = ${section}")
val dataSpace = IndexSpace(section)

val useEntireChunk = wantSpace.contains(dataSpace)
val intersectSpace = if (useEntireChunk) dataSpace else wantSpace.intersect(dataSpace)
val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1)
BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size)
else
throw RuntimeException("Unsupprted mdl ${vinfo.mdl}")

val chunker = Chunker(dataSpace, wantSpace) // each DataChunkEntry has its own Chunker iteration
chunker.transferBA(array, 0, elemSize, ba, 0)
val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")

if (h5type.datatype5 == Datatype5.Vlen) {
// internal fun <T> H5builder.processVlenIntoArray(h5type: H5TypeInfo, shape: IntArray, ba: ByteArray, nelems: Int, elemSize : Int): ArrayTyped<T> {
this.processVlenIntoArray(h5type, intersectSpace.shape.toIntArray(), ba, intersectSpace.totalElements.toInt(), elemSize)
var transferChunks = 0
val state = OpenFileState(0L, vinfo.h5type.isBE)
for (dataChunk: DataChunkIF in tiledData.dataChunks(wantSpace)) { // : Iterable<BTree1New.DataChunkEntry>
val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else {
this.processDataIntoArray(ba, h5type.isBE, datatype, intersectSpace.shape.toIntArray(), h5type, elemSize)
if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
state.pos = dataChunk.childAddress()
val chunkData = this.raf.readByteArray(state, dataChunk.chunkSize())
val filteredData = if (dataChunk.filterMask() == null) chunkData
else filters.apply(chunkData, dataChunk.filterMask()!!)
chunker.transferBA(filteredData, 0, elemSize, ba, 0)
transferChunks += chunker.transferChunks
}
}

}, done = { })
val shape = wantSpace.shape.toIntArray()

return ArraySection(array, intersectSpace.section(v2.shape))
return if (h5type.datatype5 == Datatype5.Vlen) {
this.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize)
} else {
this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped<T>
}
} */

// DataLayoutBTreeVer1
internal fun <T> H5builder.readBtreeVer1(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type

Expand All @@ -117,25 +128,40 @@ internal fun <T> H5builder.readBtreeVer1(v2: Variable<T>, wantSection: Section):
}
val ba = ByteArray(sizeBytes.toInt())

val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1)
BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size)
else
throw RuntimeException("Unsupprted mdl ${vinfo.mdl}")
val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) {
// internal class BTree1(
// val h5: H5builder,
// val rootNodeAddress: Long,
// val nodeType : Int, // 0 = group/symbol table, 1 = raw data chunks
// val ndimStorage: Int? = null // TODO allowed to be null ??
//)
// BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size)
// internal class BTree1data(
// val raf: OpenFileExtended,
// rootNodeAddress: Long,
// varShape: LongArray,
// chunkShape: LongArray,
//)
val rafext: OpenFileExtended = this.openFileExtended()
BTree1data(rafext, vinfo.dataPos, v2.shape, vinfo.storageDims)
} else {
throw RuntimeException("Unsupported mdl ${vinfo.mdl}")
}

val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
//val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")
//if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")

var transferChunks = 0
val state = OpenFileState(0L, vinfo.h5type.isBE)
for (dataChunk: DataChunkIF in tiledData.dataChunks(wantSpace)) { // : Iterable<BTree1New.DataChunkEntry>
btree1.asSequence().forEach { (order, dataChunk) ->
val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
if (debugChunking) println(" missing ${dataChunk.show()}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else {
if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
if (debugChunking) println(" chunk=${dataChunk.show()}")
state.pos = dataChunk.childAddress()
val chunkData = this.raf.readByteArray(state, dataChunk.chunkSize())
val filteredData = if (dataChunk.filterMask() == null) chunkData
Expand All @@ -154,17 +180,18 @@ internal fun <T> H5builder.readBtreeVer1(v2: Variable<T>, wantSection: Section):
}
}

// DataLayoutBTreeVer1
internal fun <T> readBtree1data(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
// DataLayoutBTreeVer1 using chunkIterator
internal fun <T> readBtree1dataWithChunkIterator(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type
val datatype = vinfo.h5type.datatype()
val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size

val useSection = SectionPartial.fill(wantSection, v2.shape)
val wantSpace = IndexSpace(useSection)
val nelems = wantSpace.totalElements.toInt()

// we will be forever haunted by this
// val useDatatype = if (vinfo.h5type.datatype5 == Datatype5.String) Datatype.STRING else datatype

val values = when (datatype) {
Datatype.BYTE -> ByteArray(nelems)
Datatype.CHAR, Datatype.UBYTE, Datatype.ENUM1 -> UByteArray(nelems)
Expand Down
Loading