Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# netchdf
_last updated: 6/12/2025_
_last updated: 7/7/2025_

This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries.

The intention is to create a maintainable, read-only, pure JVM library allowing full access to
netcdf3, netcdf4, hdf4, hdf5, hdf-eos2, and hdf-eos5 data files.

Evaluating if support for superblock 4 is feasible.

Please contact me if you'd like to help out. Especially needed are test datasets from all the important data archives!!

### Building
Expand Down Expand Up @@ -68,7 +70,7 @@
library for data access is less clear. For now, we will provide a "best-effort" to expose the internal
contents of the file.

Currently, the Netcdf-4 and HDF5 libraries are not thread safe, not even for read-only applications.

Check failure on line 73 in Readme.md

View workflow job for this annotation

GitHub Actions / Check for spelling errors

hasnt ==> hasn't
This is a serious limitation for high performance, scalable applications, and it is disappointing that it hasnt been fixed.
See [Toward Multi-Threaded Concurrency in HDF5](https://www.hdfgroup.org/wp-content/uploads/2022/05/Toward-MT-HDF5.pdf),
and [RFC:Multi-Thread HDF5](https://support.hdfgroup.org/releases/hdf5/documentation/rfc/RFC_multi_thread.pdf) for more information.
Expand Down
13 changes: 5 additions & 8 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version = libs.versions.netchdf.get()

kotlin {
jvm()
/*

val hostOs = System.getProperty("os.name")
val isMingwX64 = hostOs.startsWith("Windows")
val arch = System.getProperty("os.arch")
Expand All @@ -31,8 +31,6 @@ kotlin {
else -> throw GradleException("Host OS is not supported.")
}

*/

sourceSets {
val commonMain by getting {
dependencies {
Expand All @@ -42,11 +40,10 @@ kotlin {
implementation(libs.fleeksoft)
}
}
val jvmMain by
getting {
dependencies {
implementation(libs.slf4j.jvm)
}
val jvmMain by getting {
dependencies {
implementation(libs.slf4j.jvm)
}
}
val commonTest by getting {
dependencies {
Expand Down
6 changes: 3 additions & 3 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import com.sunya.netchdf.hdf5.BTree1.Node
* TODO ?? Used in readGroupNew( type 5 and 6), readAttributesFromInfoMessage(), FractalHeap, and DHeapId(type 1,2,3,4)
*/
@OptIn(InternalLibraryApi::class)
internal class BTree2(private val h5: H5builder, owner: String, address: Long, val ndimStorage: Int? = null) : BTreeIF { // BTree2
internal class BTree2(private val h5: H5builder, owner: String, address: Long, val ndimStorage: Int) : BTreeIF { // BTree2
val btreeType: Int
private val nodeSize: Int // size in bytes of btree nodes
private val recordSize: Short// size in bytes of btree records
Expand Down Expand Up @@ -226,8 +226,8 @@ internal class BTree2(private val h5: H5builder, owner: String, address: Long, v
7 -> Record70(state) // TODO wrong
8 -> Record8(state)
9 -> Record9(state)
10 -> Record10(state, ndimStorage!!) // TODO wrong, whats ndims?
11 -> Record11(state, ndimStorage!!) // TODO wrong, whats ndims?
10 -> Record10(state, ndimStorage - 1) // TODO wrong, whats ndims?
11 -> Record11(state, ndimStorage - 1) // TODO wrong, whats ndims?
else -> throw IllegalStateException()
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ internal class FractalHeap(private val h5: H5builder, forWho: String, address: L
when (subtype) {
1, 2 -> {
val btree = if (btreeHugeObjects == null) {
val local = BTree2(h5, "FractalHeap btreeHugeObjects", btreeAddressHugeObjects)
val local = BTree2(h5, "FractalHeap btreeHugeObjects", btreeAddressHugeObjects, 0)
require(local.btreeType == subtype)
local.readEntries()
} else btreeHugeObjects
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,66 @@ internal class H5chunkReader(val h5 : H5builder) {
*/
}

internal fun <T> readSingleChunk(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type

val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size
val datatype = vinfo.h5type.datatype()

val wantSpace = IndexSpace(wantSection)
val sizeBytes = wantSpace.totalElements * elemSize
if (sizeBytes <= 0 || sizeBytes >= Int.MAX_VALUE) {
throw RuntimeException("Illegal nbytes to read = $sizeBytes")
}
val mdl = vinfo.mdl as DataLayoutSingleChunk4
if (mdl.isFiltered) throw UnsupportedOperationException("readSingleChunk doesnt support filtered data")
val ba = ByteArray(sizeBytes.toInt())

val state = OpenFileState(mdl.heapAddress, vinfo.h5type.isBE)
h5.raf.readByteArray(state, ba.size)

return h5.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, v2.shape.toIntArray(), h5type, elemSize) as ArrayTyped<T>

/*
val varShape = v2.shape
val chunk = IntArray(mdl.dims.size - 1) { mdl.dims[it] } // remove the element size
val nchunkElems = chunk.computeSize()
val odo = IndexND(IndexSpace(chunk), varShape)

var countChunkElems = 0L
//var transferBytes = 0
val state = OpenFileState(0L, vinfo.h5type.isBE)
for (dataChunk: FilteredChunk in index!!.filteredChunks) {
// TODO we need to know the dataChunk's IndexSpace, ie its position with varShape.
if (debugChunking) println("$countChunkElems == ${odo.current.contentToString()}}")

val dataSection = IndexSpace(v2.rank, odo.current, vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace)
/* if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else { */
//if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
state.pos = dataChunk.address
val chunkData = h5.raf.readByteArray(state, dataChunk.chunkSize)
val filteredData = filters.apply(chunkData, dataChunk.filterMask)
chunker.transferBA(filteredData, 0, elemSize, ba, 0) // this iterates overs Chunker's chunks
//transferChunks += chunker.transferChunks
// }
countChunkElems += nchunkElems
odo.set(countChunkElems)
}

*/

// val filteredData = if (dataChunk.filterMask() == null) chunkData
// else filters.apply(chunkData, dataChunk.filterMask()!!)



}

/*
internal fun <T> readBtreeVer1(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
Expand Down Expand Up @@ -170,7 +230,6 @@ internal class H5chunkReader(val h5 : H5builder) {

val shape = wantSpace.shape.toIntArray()


return if (h5type.datatype5 == Datatype5.Vlen) {
h5.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ internal fun H5builder.readGroupNew(
check(btreeAddress >= 0) { "no valid btree for GroupNew with Fractal Heap" }

// read in btree and all entries
val btree = BTree2(this, parent.name, btreeAddress)
val btree = BTree2(this, parent.name, btreeAddress, 0)
for (e in btree.readEntries()) {
var heapId: ByteArray = when (btree.btreeType) {
5 -> (e.record as BTree2.Record5).heapId
Expand Down
8 changes: 4 additions & 4 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
alldata.section(wantSection)
} else if (vinfo.mdl.isContiguous) {
header.readRegularData(vinfo, v2.datatype, wantSection)
// } else if (vinfo.mdl is DataLayoutBTreeVer1) {
// H5chunkReader(header).readBtreeVer1(v2, wantSection)
} else if (vinfo.mdl is DataLayoutFixedArray4) {
H5chunkReader(header).readFixedArray(v2, wantSection)
} else if (vinfo.mdl is DataLayoutBTreeVer1 || vinfo.mdl is DataLayoutBtreeVer2) {
H5chunkReader(header).readBtreeVer12(v2, wantSection)
} else if (vinfo.mdl is DataLayoutSingleChunk4) {
H5chunkReader(header).readSingleChunk(v2, wantSection)
} else if (vinfo.mdl is DataLayoutFixedArray4) {
H5chunkReader(header).readFixedArray(v2, wantSection)
} else {
throw RuntimeException("Unsupported data layer type ${vinfo.mdl}")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.computeSize
import com.sunya.cdm.iosp.OpenFileState
import com.sunya.cdm.util.InternalLibraryApi

Expand Down Expand Up @@ -129,34 +130,77 @@ internal fun H5builder.readDataLayoutMessage(state : OpenFileState) : DataLayout

// version 4, layoutClass = 2 is too complex for structdls
if (layoutClass == 2) {
val nextBytes = raf.readByteArray(state.copy(), 40)
println("version 4, layoutClass = 2 ${this.raf.location()}")
println(" nextBytes after chunkIndexingType ${nextBytes.contentToString()}")

// this structure is too complex for structdls
val version = raf.readByte(state)
val layoutClass = raf.readByte(state)
val flags = raf.readByte(state)
val rank = raf.readByte(state).toInt()
val dimSizeLength = raf.readByte(state)
val dims = IntArray(rank) { this.readVariableSizeDimension(state, dimSizeLength) } // TODO is dimSizeLength correct ??
var chunkSize = dims.computeSize()

val chunkIndexingType = raf.readByte(state).toInt()
return when (chunkIndexingType) {
1 -> { // VII.A single chunk index
val chunkSize = this.readLength(state)
// #define H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS 0x01 // no filter
// #define H5O_LAYOUT_CHUNK_SINGLE_INDEX_WITH_FILTER 0x02 // has a filter

// udata->chunk_block.offset = idx_info->storage->idx_addr;
// if (idx_info->layout->flags & H5O_LAYOUT_CHUNK_SINGLE_INDEX_WITH_FILTER) {
// udata->chunk_block.length = idx_info->storage->u.single.nbytes;
// udata->filter_mask = idx_info->storage->u.single.filter_mask;
// } /* end if */
// else {
// udata->chunk_block.length = idx_info->layout->size;
// udata->filter_mask = 0;
// } /* end else */

// "The following information exists only when the chunk is filtered.
// In other words, when H5O_LAYOUT_CHUNK_SINGLE_INDEX_WITH_FILTER (bit 1) is enabled in the field flags."
// TODO not clear
val nextBytes = raf.readByteArray(state.copy(), 40)
println("SingleChunk ${this.raf.location()}")
println(" chunkSize $chunkSize nextBytes ${nextBytes.contentToString()}")
// https://github.com/HDFGroup/hdf5/issues/5610
// The second field should be "Filter mask" for the chunk, which indicates the filter to skip for the dataset chunk.
// Each filter has an index number in the pipeline; if that filter is skipped, the bit corresponding to its index is set.
val filterMask = raf.readInt(state)
/* repeat ( 32) { idx ->
val isSet = isBitSet(filterMask, idx)
println(" idx = $idx isSet = $isSet")
} */
println(" nextBytes after chunkIndexingType ${nextBytes.contentToString()}")

var filterMask : Int? = null
if (isBitSet(flags.toInt(), 1)) {
// Indexing Type Information (variable size)
chunkSize = this.readLength(state).toInt()

// https://github.com/HDFGroup/hdf5/issues/5610
// The second field should be "Filter mask" for the chunk, which indicates the filter to skip for the dataset chunk.
// Each filter has an index number in the pipeline; if that filter is skipped, the bit corresponding to its index is set.
// It would be surprising to have a Filter Mask here, since that usually references a FilterPipeline message, but there is none.

// /home/all/testdata/netcdf-c_hdf5_superblocks/netcdf-c-test-files/v1_10/nc_test4__testfilter_reg.nc
// sizeOfFilteredChunk 1024 nextBytes [0, 0, 0, 0, 3, 27, 0, 0, 0, 0, 0, 0, 21, 28, 0, 4, 0, 0, 0, 3, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]

// filtered, flags == 2; /home/all/testdata/netcdf-c_hdf5_superblocks/netcdf-c-test-files/v1_10/examples__bzip2.nc
// sizeOfFilteredChunk 501 nextBytes [0, 0, 0, 0, 3, 27, 0, 0, 0, 0, 0, 0, 21, 28, 0, 4, 0, 0, 0, 3, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]

// TODO not clear, number and meaning of the fields. Possibly embedding the filter pipeline here, instead of a seperate message ??
// uint32_t filter_mask; /* Excluded filters for chunk */ seems theres only 32 filters? must be a seperate mechansism for extensions
filterMask = raf.readInt(state) // "This field contains filters for the chunk."
repeat ( 32) { idx ->
val isSet = isBitSet(filterMask, idx)
if (isSet) println(" idx = $idx isSet = $isSet")
}
println(" sizeOfFilteredChunk $chunkSize filterMask $filterMask")
}

// [0, 4, 0, 0, 0, 0, 0, 0,
// 0, 0, 0, 0, 3, 27, 0, 0,
// 0, 0, 0, 0, 21, 28, 0, 4,
// 0, 0, 0, 3, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]

// Address of the single chunk. size specified in “Size of Lengths” field in the superblock.
// The address may be undefined if the chunk or index storage is not allocated yet.
val chunkAddress = this.readLength(state)
println(" filterMask $filterMask chunkAddress $chunkAddress")

DataLayoutSingleChunk4(flags, dims, chunkSize, chunkAddress)
DataLayoutSingleChunk4(flags, dims, chunkSize = chunkSize, chunkAddress, filterMask)
}
2 -> { // VII.B implicit index
// Address of the array of dataset chunks.
Expand All @@ -180,7 +224,9 @@ internal fun H5builder.readDataLayoutMessage(state : OpenFileState) : DataLayout
val minElements = raf.readByte(state)
val pageBits = raf.readByte(state)
val indexAddress = raf.readLong(state) // probably wrong
DataLayoutExtensibleArray4(flags, dims, maxBits, indexElements, minPointers, minElements, pageBits, indexAddress)
val result = DataLayoutExtensibleArray4(flags, dims, maxBits, indexElements, minPointers, minElements, pageBits, indexAddress)
println(result.show())
result
}
5 -> { // VII.E version 2 B-tree index
val nodeSize = raf.readInt(state)
Expand All @@ -192,7 +238,7 @@ internal fun H5builder.readDataLayoutMessage(state : OpenFileState) : DataLayout
}
else -> throw RuntimeException()
}
val address = raf.readLong(state) // TODO read address ??
// val address = raf.readLong(state) // TODO read address ??
}
}
throw RuntimeException()
Expand Down Expand Up @@ -245,8 +291,9 @@ internal data class DataLayoutContiguous3(val dataAddress: Long, val dataSize: L
}

// 4
internal data class DataLayoutSingleChunk4(val flags: Byte, val dims: IntArray, val chunkSize: Long, val heapAddress: Long) : DataLayoutMessage() {
override fun show(): String = "${super.show()} flags=$flags dims=$dims chunkSize=$chunkSize heapAddress=$heapAddress"
internal data class DataLayoutSingleChunk4(val flags: Byte, val dims: IntArray, val chunkSize: Int, val heapAddress: Long, val filterMask: Int?) : DataLayoutMessage() {
val isFiltered = isBitSet(flags.toInt(), 1)
override fun show(): String = "${super.show()} flags=$flags dims=$dims heapAddress=$heapAddress chunkSize=$chunkSize"
}
internal data class DataLayoutImplicit4(val flags: Byte, val dims: IntArray, val address: Long) : DataLayoutMessage() {
override fun show(): String = "${super.show()} flags=$flags dims=$dims address=$address"
Expand All @@ -256,7 +303,7 @@ internal data class DataLayoutFixedArray4(val flags: Byte, val dims: IntArray, v
}
internal data class DataLayoutExtensibleArray4(val flags: Byte, val dims: IntArray, val maxBits: Byte, val indexElements: Byte,
val minPointers: Byte, val minElements: Byte, val pageBits: Byte, val indexAddress: Long) : DataLayoutMessage() {
override fun show(): String = "${super.show()} flags=$flags dims=$dims maxBits=$maxBits indexElements=$indexElements " +
override fun show(): String = "${super.show()} flags=$flags dims=${dims.contentToString()} maxBits=$maxBits indexElements=$indexElements " +
"minPointers=$minPointers minElements=$minPointers pageBits=$pageBits indexAddress=$indexAddress"
}
internal data class DataLayoutBtreeVer2(val flags: Byte, val dims: IntArray, val nodeSize: Int, val splitPercent: Byte, val mergePercent: Byte, val heapAddress: Long)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ private fun H5builder.readAttributesFromInfoMessage(

val btreeAddress: Long = attributeOrderBtreeAddress ?: attributeNameBtreeAddress
if (btreeAddress < 0 || fractalHeapAddress < 0) return emptyList()
val btree2 = BTree2(this, "AttributeInfoMessage", btreeAddress)
val btree2 = BTree2(this, "AttributeInfoMessage", btreeAddress, 0)
val fractalHeap = FractalHeap(this, "AttributeInfoMessage", fractalHeapAddress)

val list = mutableListOf<AttributeMessage>()
Expand Down
24 changes: 23 additions & 1 deletion core/src/commonMain/kotlin/com/sunya/netchdfc/NetchdfCApi.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@ package com.sunya.netchdfc
import com.sunya.cdm.api.*
import com.sunya.cdm.array.ArrayInt

/*
import kotlinx.cinterop.CPointer
import kotlinx.cinterop.ExperimentalForeignApi
import kotlinx.cinterop.IntVar
import kotlinx.cinterop.addressOf
import kotlinx.cinterop.pin
import kotlin.experimental.ExperimentalNativeApi
import kotlin.native.CName

@OptIn(ExperimentalNativeApi::class, ExperimentalForeignApi::class)
@CName("getPinnedIntArrayPointer")
fun getPinnedIntArrayPointer(array: IntArray): CPointer<IntVar> {
return array.pin().addressOf(0)
}

// for testing
@OptIn(ExperimentalForeignApi::class)
fun testCArray(): CPointer<IntVar> {
return getPinnedIntArrayPointer(intArrayOf(1,2,3,4,5))
}
*/

fun version() : String {
return "netchdf version 0.4.0"
}
Expand All @@ -27,4 +49,4 @@ class ArrayIntSection(val varName: String, val varShape: LongArray, val rank: In

internal fun Section.toSectionPartial() : SectionPartial {
return SectionPartial(this.ranges)
}
}
Loading
Loading