Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .idea/gradle.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# netchdf
_last updated: 7/7/2025_
_last updated: 7/9/2025_

This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries.

Expand Down Expand Up @@ -46,6 +46,9 @@
versions of the standard with clear fixes. For Netcdf/Hdf, the standard is the file formats, along with their semantic
descriptions. The API is language and library specific, and is secondary to the standard.

More subtly, its very hard to see the elegance (or otherwise) of your own design, you need independent review of your
data structures and API by people truly invested in getting them right.

Having multiple implementations is a huge win for the reference library, in that bugs are more quickly found, and
ambiguities more quickly identified.

Expand All @@ -70,7 +73,7 @@
contents of the file.

Currently, the Netcdf-4 and HDF5 libraries are not thread safe, not even for read-only applications.
This is a serious limitation for high performance, scalable applications, and it is disappointing that it hasnt been fixed.

Check failure on line 76 in Readme.md

View workflow job for this annotation

GitHub Actions / Check for spelling errors

hasnt ==> hasn't
See [Toward Multi-Threaded Concurrency in HDF5](https://www.hdfgroup.org/wp-content/uploads/2022/05/Toward-MT-HDF5.pdf),
and [RFC:Multi-Thread HDF5](https://support.hdfgroup.org/releases/hdf5/documentation/rfc/RFC_multi_thread.pdf) for more information.

Expand Down
3 changes: 3 additions & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ version = libs.versions.netchdf.get()
kotlin {
jvm()

/*
val hostOs = System.getProperty("os.name")
val arch = System.getProperty("os.arch")
when {
Expand All @@ -28,6 +29,8 @@ kotlin {
else -> throw GradleException("Host OS is not supported.")
}

*/

sourceSets {
val commonMain by getting {
dependencies {
Expand Down
2 changes: 2 additions & 0 deletions core/src/commonMain/kotlin/com/sunya/cdm/api/Datatype.kt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ data class Datatype<T>(val cdlName: String, val size: Int, val typedef : Typedef
// Experimental for HDF5; maybe should be T = String ??
val REFERENCE = Datatype<Long>("reference", 4) // string = full path to referenced dataset

val UNKNOWN = Datatype<Int>("unknown", 4) // needed ?

fun values() = listOf(BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, DOUBLE, FLOAT,
ENUM1, ENUM2, ENUM4, ENUM8,
CHAR, STRING, OPAQUE, COMPOUND, VLEN, REFERENCE
Expand Down
5 changes: 3 additions & 2 deletions core/src/commonMain/kotlin/com/sunya/cdm/api/Netchdf.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ interface Netchdf : AutoCloseable {
// the section describes the array chunk reletive to the variable's shape.
data class ArraySection<T>(val array : ArrayTyped<T>, val section : Section)

// Experimental: read concurrent chunks of data, call back with them in ArraySection, order is arbitrary.
fun <T> Netchdf.chunkConcurrent(v2: Variable<T>, section: SectionPartial? = null, maxElements : Int? = null, nthreads: Int = 20, lamda : (ArraySection<T>) -> Unit) {
// Experimental: read concurrently chunks of data, call back with lamda, order is arbitrary.
fun <T> Netchdf.chunkConcurrent(v2: Variable<T>, section: SectionPartial? = null, maxElements : Int? = null, nthreads: Int = 20,
lamda : (ArraySection<T>) -> Unit) {
val reader = ReadChunkConcurrent()
val chunkIter = this.chunkIterator( v2, section, maxElements)
reader.readChunks(nthreads, chunkIter, lamda)
Expand Down
1 change: 1 addition & 0 deletions core/src/commonMain/kotlin/com/sunya/cdm/api/Typedef.kt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class EnumTypedef(name : String, baseType : Datatype<*>, val valueMap : Map<Int,
/** Convert Attribute values of type ENUM into equivalent names */
fun Attribute<*>.convertEnums(): List<String> {
require(this.datatype.isEnum)
// if (this.datatype.typedef == null) return listOf("enum attribute with no typedef")
val typedef = (this.datatype.typedef as EnumTypedef)
return values.convertEnums(typedef.valueMap)
}
Expand Down
2 changes: 2 additions & 0 deletions core/src/commonMain/kotlin/com/sunya/cdm/layout/Chunker.kt
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@ internal class Chunker(val dataChunk: IndexSpace, val wantSpace: IndexSpace, mer

*/

// the chunker tracks the dst offset
internal fun transferBA(src: ByteArray, srcOffset: Int, elemSize : Int, dst: ByteArray, dstOffset: Int) {
for (chunk in this) {
// println(" chunker copy $elemSize elements from src ${srcOffset + elemSize * chunk.srcElem.toInt()} to dst ${dstOffset + elemSize * chunk.destElem.toInt()}")
src.copyInto(dst,
dstOffset + elemSize * chunk.destElem.toInt(),
srcOffset + elemSize * chunk.srcElem.toInt(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.sunya.netchdf.hdf5

import com.sunya.cdm.layout.Tiling

// TODO could this be turned into a chunked data reader ?
interface BTreeIF {
fun rootNodeAddress() : Long
fun readNode(address: Long, parent: BTreeNodeIF?) : BTreeNodeIF
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
@file:OptIn(InternalLibraryApi::class)

package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.computeSize
import com.sunya.cdm.iosp.OpenFileIF
import com.sunya.cdm.iosp.OpenFileState
import com.sunya.cdm.util.InternalLibraryApi
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlin.math.ceil

private val logger = KotlinLogging.logger("ChunkedDataLayoutMessageV4")

// DataLayoutMessage version 4, layout class 2 (chunked), chunkIndexingType 1-5
// jhdf

@OptIn(InternalLibraryApi::class)
internal fun readChunkedDataLayoutMessageV4(builder: H5builder, raf: OpenFileIF, state : OpenFileState) : DataLayoutMessage {
val version = raf.readByte(state)
val layoutClass = raf.readByte(state)
val flags = raf.readByte(state)
val rank = raf.readByte(state).toInt()
val dimSizeLength = raf.readByte(state)
val chunkDimensions = IntArray(rank) { builder.readVariableSizeDimension(state, dimSizeLength) } // TODO is dimSizeLength correct ??
val chunkIndexingType = raf.readByte(state).toInt()

var chunkSize = chunkDimensions.computeSize()

return when (chunkIndexingType) {
1 -> {
var filterMask = 0
if (isBitSet(flags.toInt(), 1)) {
chunkSize = builder.readLength(state).toInt() // Utils.readBytesAsUnsignedInt(bb, sb.getSizeOfLengths())
filterMask = raf.readInt(state) // java.util.BitSet.valueOf(byteArrayOf(bb.get(), bb.get(), bb.get(), bb.get()))
}
// Address of the single chunk. size specified in “Size of Lengths” field in the superblock.
// The address may be undefined if the chunk or index storage is not allocated yet.
val address = builder.readLength(state) // Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets())
DataLayoutSingleChunk4(flags, chunkDimensions, chunkSize = chunkSize, address, filterMask)
}
2 -> {
val address = builder.readLength(state) // possibly wrong
DataLayoutImplicit4(flags, chunkDimensions, address)
}
3 -> {
val pageBits = raf.readByte(state) // bb.get()
val address = builder.readLength(state) // possibly wrong
DataLayoutFixedArray4(flags, chunkDimensions, pageBits, address)
}
4 -> {
val maxBits = raf.readByte(state) // bb.get()
val indexElements = raf.readByte(state) // bb.get()
val minPointers = raf.readByte(state) // bb.get()
val minElements = raf.readByte(state) // bb.get()
val pageBits = raf.readByte(state) // bb.get() // This is wrong in the spec says 2 bytes its actually 1
val address = builder.readLength(state) // possibly wrong
DataLayoutExtensibleArray4(flags, chunkDimensions, maxBits, indexElements, minPointers, minElements, pageBits, address)
}
5 -> {
val nodeSize = raf.readInt(state) // bb.getInt()
val splitPercent = raf.readByte(state) // bb.get()
val mergePercent = raf.readByte(state) // bb.get()
val address = builder.readLength(state) // possibly wrong
DataLayoutBtreeVer2(flags, chunkDimensions, nodeSize, splitPercent, mergePercent, address)
}

else -> throw RuntimeException("Unrecognized chunk indexing type. type= $chunkIndexingType" )
}
}

//////////////////////////////////////////////////////////////////
internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val mdl: DataLayoutFixedArray4) {
val chunkDimensions = IntArray(mdl.chunkDimensions.size - 1) { mdl.chunkDimensions[it] } // remove the element "dimension"

val clientId: Int
val entrySize: Int
val pageBits: Int // This field contains the number of bits needed to store the maximum number of elements in a data block page. bits ??
val maxNumberOfEntries : Int
val pages: Int
val pageSize: Int
val dataAddress: Long

val state = OpenFileState(h5.getFileOffset(mdl.indexAddress), false)
val chunks = mutableListOf<ChunkImpl>()

init {
val raf = h5.raf

// FixedArray Header
val magic = raf.readString(state, 4)
check(magic == "FAHD") { "$magic should equal FAHD" }
val version0: Byte = raf.readByte(state)
clientId = raf.readByte(state).toInt()
entrySize = raf.readByte(state).toInt()
pageBits = raf.readByte(state).toInt() // presumably this replicates the MessageDataLayout
maxNumberOfEntries = h5.readLength(state).toInt()
pageSize = 1 shl pageBits
pages = (maxNumberOfEntries + pageSize - 1) / pageSize

dataAddress = h5.readOffset(state)
state.pos = dataAddress

val pageBitmapBytes: Int = (pages + 7) / 8
// dont actually need this
var headerSize: Int = 6 + h5.sizeOffsets + entrySize * maxNumberOfEntries + 4
if (pages > 1) {
headerSize += pageBitmapBytes + (4 * pages)
}

// FixedArrayDataBlock
val magic2 = raf.readString(state, 4)
check(magic2 == "FADB") { "$magic2 should equal FADB" }

val version = raf.readByte(state).toInt()
if (version != 0) {
throw RuntimeException("Unsupported fixed array data block version detected. Version: $version")
}

val dataBlockclientId = raf.readByte(state).toInt()
if (dataBlockclientId != clientId) {
throw RuntimeException("Fixed array client ID mismatch")
}

val headerAddress = raf.readLong(state)
if (headerAddress != mdl.indexAddress) {
throw RuntimeException("Fixed array data block header address mismatch")
}

if (pages > 1) {
readPaged(pageBitmapBytes, dataBlockclientId)
} else {
// Unpaged
logger.info { "Reading unpaged" }
if (dataBlockclientId == 0) { // Not filtered
for (i in 0..< maxNumberOfEntries) {
readUnfiltered(h5.raf, state, i)
}
} else if (dataBlockclientId == 1) { // Filtered
for (i in 0..< maxNumberOfEntries) {
readFiltered(h5.raf, state,i)
}
} else {
throw RuntimeException("Unrecognized client ID = $dataBlockclientId")
}
}
}

fun readPaged(
pageBitmapBytes: Int,
dataBlockclientId: Int
) {
logger.info {"Reading paged"}
/*
val pageBitmap = ByteArray(pageBitmapBytes)
bb.get(pageBitmap)
ChecksumUtils.validateChecksumFromMark(bb)
*/
val pageBitmap = h5.raf.readByteArray(state, pageBitmapBytes)
val checksum = h5.raf.readInt(state) // confusing

var chunkIndex = 0
for (page in 0..< pages) {
val currentPageSize = getCurrentPageSize(page)

if (dataBlockclientId == 0) { // Not filtered
for (i in 0..< currentPageSize) {
readUnfiltered(h5.raf, state,chunkIndex++)
}
} else if (dataBlockclientId == 1) { // Filtered
for (i in 0..<currentPageSize) {
readFiltered(h5.raf, state, chunkIndex++)
}
} else {
throw RuntimeException("Unrecognized client ID = $dataBlockclientId")
}
val checksum = h5.raf.readInt(state) // confusing
}
}

fun getCurrentPageSize(page: Int): Int {
val currentPageSize: Int
if (page == pages - 1) {
// last page so maybe not a full page
val lastPageSize: Int = maxNumberOfEntries % pageSize
currentPageSize = if (lastPageSize == 0) pageSize else lastPageSize
} else {
currentPageSize = pageSize
}
return currentPageSize
}

// unfilteredChunkSize = Arrays.stream(getChunkDimensions()).reduce(1, Math::multiplyExact) * getDataType().getSize();
// chunkDimensions = int[] chunkDimensions = layoutMessage.getChunkDimensions(); ArrayUtils.subarray(chunkDimensions, 0, chunkDimensions.length - 1);
// datasetDimensions = dataSpace.getDimensions();

fun readFiltered(raf: OpenFileIF, state : OpenFileState, chunkIndex: Int) {
val chunkAddress = h5.readOffset(state)
// yikes: Utils.readBytesAsUnsignedInt(bb, this@FixedArrayIndex.entrySize - h5.sizeOffsets - 4)
val chunkSizeInBytes: Int = h5.readVariableSizeUnsigned(state, entrySize - h5.sizeOffsets - 4).toInt()
val filterMask = raf.readInt(state) // java.util.BitSet = java.util.BitSet.valueOf(byteArrayOf(bb.get(), bb.get(), bb.get(), bb.get()))
val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape)

chunks.add(ChunkImpl(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask))
}

fun readUnfiltered(raf: OpenFileIF, state : OpenFileState, chunkIndex: Int) {
val chunkAddress = h5.readOffset(state) // val chunkAddress: Long = Utils.readBytesAsUnsignedLong(bb, sizeOfOffsets)
val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape)
val unfilteredChunkSize = mdl.chunkDimensions.computeSize()

chunks.add(ChunkImpl(chunkAddress, unfilteredChunkSize, chunkOffset, null))
}
}

/////////////////////////////////////////////////
// internal data class DataLayoutImplicit4(val flags: Byte, val chunkDimensions: IntArray, val address: Long) : DataLayoutMessage() {
internal class ImplicitChunkIndex(val h5: H5builder, val varShape: IntArray, val mdl: DataLayoutImplicit4) {
val chunkDimensions = IntArray(mdl.chunkDimensions.size - 1) { mdl.chunkDimensions[it] } // remove the element "dimension"
var chunkSize = mdl.chunkDimensions.computeSize()

fun getAllChunks(): List<ChunkImpl> {
val totalChunks: Int = totalChunks(varShape, chunkDimensions)
val chunks = mutableListOf<ChunkImpl>()
for (i in 0..< totalChunks) {
chunks.add(
ChunkImpl(
mdl.address + i * chunkSize,
chunkSize,
chunkIndexToChunkOffset(i, chunkDimensions, varShape),
null)
)
}
return chunks
}

fun totalChunks(datasetDimensions: IntArray, chunkDimensions: IntArray): Int {
var chunks = 1
for (i in datasetDimensions.indices) {
var chunksInDim = datasetDimensions[i] / chunkDimensions[i]
// If there is a partial chunk then we need to add one chunk in this dim
if (datasetDimensions[i] % chunkDimensions[i] != 0) chunksInDim++
chunks *= chunksInDim
}
return chunks
}
}

////////////////////////////////////////////////////
data class ChunkImpl(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?) {
override fun toString(): String {
return "ChunkImpl(address=$address, size=$size, chunkOffset=${chunkOffset.contentToString()}, filterMask=$filterMask)"
}
}

fun chunkIndexToChunkOffset(chunkIndex: Int, chunkDimensions: IntArray, datasetDimensions: IntArray): IntArray {
var chunkIndex = chunkIndex
val chunkOffset = IntArray(chunkDimensions.size)

// Start from the slowest dim
for (i in chunkOffset.indices) {
// Find out how many chunks make one chunk in this dim
var chunksBelowThisDim = 1
// Start one dim faster
for (j in i + 1..<chunkOffset.size) {
chunksBelowThisDim *= ceil(datasetDimensions[j].toDouble() / chunkDimensions[j]).toInt()
}

chunkOffset[i] = (chunkIndex / chunksBelowThisDim) * chunkDimensions[i]
chunkIndex -= chunkOffset[i] / chunkDimensions[i] * chunksBelowThisDim
}

return chunkOffset
}
Loading
Loading