Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,12 @@
ArrayUByte. Call _data.makeStringsFromBytes()_ to turn this into Strings with the array rank reduced by one.
* Netcdf-3 does not have STRING or UBYTE types. In practice, CHAR is used for either.
* Netcdf-4/HDF5 library encodes CHAR values as HDF5 string type with elemSize = 1, so we use that convention to detect
legacy CHAR variables in HDF5 files. NC_CHAR should not be used in Netcdf-4, use NC_UBYTE or NC_STRING.
legacy CHAR variables in HDF5 files. (NC_CHAR should not be used in Netcdf-4, use NC_UBYTE or NC_STRING.)
* Netcdf-4/HDF5 String variables may be fixed or variable length. For fixed Strings, we set the size of Datatype.STRING to
the fixed size. For both fixed and variable length Strings, the string withh be truncated at the first zero byte, if any.

Check failure on line 208 in Readme.md

View workflow job for this annotation

GitHub Actions / Check for spelling errors

withh ==> with
* HDF4 does not have a STRING type, but does have signed and unsigned CHAR, and signed and unsigned BYTE.
We map both signed and unsigned to Datatype.CHAR and handle it as above (Attributes are Strings, Variables are UBytes).
* _Datatype.STRING_ is always variable length, regardless of whether the data in the file is variable or fixed length.
* _Datatype.STRING_ always appears to be variable length to the user, regardless of whether the data in the file is variable or fixed length.

#### Typedef
Unlike Netcdf-Java, we follow Netcdf-4 "user defined types" and add typedefs for Compound, Enum, Opaque, and Vlen.
Expand All @@ -226,6 +228,8 @@
* Opaque: hdf5 makes arrays of Opaque all the same size, which gives up some of its usefulness. If there's a need,
we will allow Opaque(*) indicating that the sizes can vary.
* Attributes can be of type REFERENCE, with value the full path name of the referenced dataset.
* Vlen Strings are stored on the heap. Fixed length Strings are kept in byte arrays.
This is more or less invisible to the User.

#### Compare with HDF4 data model
* All data access is unified under the netchdf API.
Expand Down
9 changes: 5 additions & 4 deletions core/src/commonMain/kotlin/com/sunya/cdm/api/Datatype.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ import com.sunya.cdm.array.ArrayStructureData
* @param cdlName name in CDL
* @param size Size in bytes of one element of this data type.
* @param typedef used for ENUM, VLEN, OPAQUE, COMPOUND
* @param isVlen HDF5 needs to track if this is a Vlen or regular String.
* @param isVlen HDF5 needs to track if this is a Vlen or fixed length String.
*/
// TODO should this be an actual Enum?
data class Datatype<T>(val cdlName: String, val size: Int, val typedef : Typedef? = null, val isVlen : Boolean? = null) {

companion object {
Expand Down Expand Up @@ -99,6 +98,8 @@ data class Datatype<T>(val cdlName: String, val size: Int, val typedef : Typedef

fun withVlen(isVlen: Boolean): Datatype<T> = this.copy(isVlen = isVlen)

fun withSize(size: Int): Datatype<T> = this.copy(size = size)

// like enum, equals just compares the type, ignoring the "with" properties. TODO WHY?
override fun equals(other: Any?): Boolean {
if (this === other) return true
Expand All @@ -107,14 +108,14 @@ data class Datatype<T>(val cdlName: String, val size: Int, val typedef : Typedef
other as Datatype<*>

if (cdlName != other.cdlName) return false
if (size != other.size) return false
//if (size != other.size) return false

return true
}

override fun hashCode(): Int {
var result = cdlName.hashCode()
result = 31 * result + size
//result = 31 * result + size
return result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ArrayStructureData(shape : IntArray, val ba : ByteArray, val isBE: Boolean
}
is ArrayString -> {
if (value.values.size == 1)
append(value.values[0])
append("\"${value.values[0]}\"")
else
append("[${value.showValues()}]")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ArrayUByte(shape: IntArray, datatype: Datatype<*>, val values: UByteArray)
}

override fun section(section: Section): ArrayUByte {
return ArrayUByte(section.shape.toIntArray(), sectionOf(section))
return ArrayUByte(section.shape.toIntArray(), this.datatype, sectionOf(section))
}

private fun sectionOf(section: Section): UByteArray {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class ArrayUInt(shape : IntArray, datatype : Datatype<*>, val values: UIntArray)
}

override fun section(section: Section): ArrayUInt {
return ArrayUInt(section.shape.toIntArray(), sectionOf(section))
return ArrayUInt(section.shape.toIntArray(), this.datatype, sectionOf(section))
}

private fun sectionOf(section: Section): UIntArray {
Expand Down
6 changes: 3 additions & 3 deletions core/src/commonMain/kotlin/com/sunya/cdm/array/ArrayULong.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import com.sunya.cdm.layout.IndexSpace
import com.sunya.cdm.layout.TransferChunk

@OptIn(ExperimentalUnsignedTypes::class)
class ArrayULong(shape : IntArray, val values: ULongArray) : ArrayTyped<ULong>(Datatype.ULONG, shape) {
class ArrayULong(shape : IntArray, datatype : Datatype<*>, val values: ULongArray) : ArrayTyped<ULong>(datatype, shape) {

override fun iterator(): Iterator<ULong> = BufferIterator()
private inner class BufferIterator : AbstractIterator<ULong>() {
Expand All @@ -17,7 +17,7 @@ class ArrayULong(shape : IntArray, val values: ULongArray) : ArrayTyped<ULong>(D
}

override fun section(section: Section): ArrayULong {
return ArrayULong(section.shape.toIntArray(), sectionOf(section))
return ArrayULong(section.shape.toIntArray(), this.datatype, sectionOf(section))
}

private fun sectionOf(section: Section): ULongArray {
Expand All @@ -40,6 +40,6 @@ class ArrayULong(shape : IntArray, val values: ULongArray) : ArrayTyped<ULong>(D

companion object {
fun fromLongArray(shape : IntArray, values : LongArray): ArrayULong =
ArrayULong(shape, ULongArray(values.size) { values[it].toULong() } )
ArrayULong(shape, Datatype.ULONG, ULongArray(values.size) { values[it].toULong() } )
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class ArrayUShort(shape : IntArray, datatype : Datatype<*>, val values: UShortAr
}

override fun section(section: Section): ArrayUShort {
return ArrayUShort(section.shape.toIntArray(), sectionOf(section))
return ArrayUShort(section.shape.toIntArray(), this.datatype, sectionOf(section))
}

private fun sectionOf(section: Section): UShortArray {
Expand Down
3 changes: 2 additions & 1 deletion core/src/commonMain/kotlin/com/sunya/cdm/array/ArrayVlen.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.sunya.cdm.api.*
import com.sunya.cdm.layout.IndexND
import com.sunya.cdm.layout.IndexSpace

// maybe should just return primitive array if only one ??
class ArrayVlen<T>(shape : IntArray, val values : List<Array<T>>, val baseType : Datatype<T>)
: ArrayTyped<Array<T>>(Datatype.VLEN as Datatype<Array<T>>, shape) {

Expand Down Expand Up @@ -78,7 +79,7 @@ class ArrayVlen<T>(shape : IntArray, val values : List<Array<T>>, val baseType :
Datatype.INT -> ArrayVlen(shape, arrays.map { it as Array<Int> }, baseType as Datatype<Int>)
Datatype.UINT, Datatype.ENUM4 -> ArrayVlen(shape, arrays.map { it as Array<UInt> }, baseType as Datatype<UInt>)
Datatype.LONG -> ArrayVlen(shape, arrays.map { it as Array<Long> }, baseType as Datatype<Long>)
Datatype.ULONG, Datatype.ENUM2 -> ArrayVlen(shape, arrays.map { it as Array<ULong> }, baseType as Datatype<ULong>)
Datatype.ULONG, Datatype.ENUM8 -> ArrayVlen(shape, arrays.map { it as Array<ULong> }, baseType as Datatype<ULong>)
Datatype.FLOAT -> ArrayVlen(shape, arrays.map { it as Array<Float> }, baseType as Datatype<Float>)
Datatype.DOUBLE -> ArrayVlen(shape, arrays.map { it as Array<Double> }, baseType as Datatype<Double>)
Datatype.STRING -> ArrayVlen(shape, arrays.map { it as Array<String> }, baseType as Datatype<String>)
Expand Down
88 changes: 74 additions & 14 deletions core/src/commonMain/kotlin/com/sunya/cdm/array/StructureMember.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.sunya.cdm.array
import com.sunya.cdm.api.Datatype
import com.sunya.cdm.api.EnumTypedef
import com.sunya.cdm.api.computeSize
import com.sunya.cdm.array.ArrayStructureData.StructureData
import com.sunya.cdm.util.makeValidCdmObjectName

// dim lengths here are ints; Hdf4,5 only supports ints.
Expand All @@ -18,43 +19,56 @@ class StructureMember<T>(orgName: String, val datatype : Datatype<T>, val offset
* Get the value of this member from the given StructureData.
* return T for nelems = 1, ArrayTyped<T> for nelems > 1
*/
fun value(sdata: ArrayStructureData.StructureData): Any {
fun value(sdata: StructureData): Any {
val offset = sdata.offset + this.offset

if (nelems > 1) { // && !datatype.isVlenString) {
if (nelems > 1 && !datatype.isVlenString && (datatype != Datatype.VLEN)) {
val tba = TypedByteArray(this.datatype, sdata.ba, offset, this.isBE)
return tba.convertToArrayTyped(shape)
}

val enumTypedef = if (datatype.isEnum) (this.datatype.typedef as EnumTypedef) else null
val enumTypedef = if (!datatype.isEnum || this.datatype.typedef == null) null
else this.datatype.typedef as EnumTypedef

return when (datatype) {
Datatype.BYTE -> sdata.ba.get(offset)
Datatype.SHORT -> convertToShort(sdata.ba, offset, this.isBE)
Datatype.INT -> convertToInt(sdata.ba, offset, this.isBE)
Datatype.LONG -> convertToLong(sdata.ba, offset, this.isBE)
Datatype.UBYTE, Datatype.CHAR -> sdata.ba.get(offset).toUByte()
Datatype.ENUM1 -> enumTypedef!!.convertEnum(sdata.ba.get(offset).toInt())
Datatype.ENUM1 -> {
val evalue = sdata.ba.get(offset).toInt()
enumTypedef?.convertEnum(evalue) ?: evalue
}
Datatype.USHORT -> convertToShort(sdata.ba, offset, this.isBE).toUShort()
Datatype.ENUM2 -> enumTypedef!!.convertEnum(convertToShort(sdata.ba, offset, this.isBE).toInt())
Datatype.ENUM2 -> {
val evalue = convertToShort(sdata.ba, offset, this.isBE).toInt()
enumTypedef?.convertEnum(evalue) ?: evalue
}
Datatype.UINT -> convertToInt(sdata.ba, offset, this.isBE).toUInt()
Datatype.ENUM4 -> enumTypedef!!.convertEnum(convertToInt(sdata.ba, offset, this.isBE))
Datatype.ENUM4 -> {
val evalue = convertToInt(sdata.ba, offset, this.isBE)
enumTypedef?.convertEnum(evalue) ?: evalue
}
Datatype.ULONG -> convertToLong(sdata.ba, offset, this.isBE).toULong()
Datatype.ENUM8 -> enumTypedef!!.convertEnum(convertToLong(sdata.ba, offset, this.isBE).toInt())
Datatype.ENUM8 -> {
val evalue = convertToLong(sdata.ba, offset, this.isBE).toInt()
enumTypedef?.convertEnum(evalue) ?: evalue
}
Datatype.FLOAT -> convertToFloat(sdata.ba, offset, this.isBE)
Datatype.DOUBLE -> convertToDouble(sdata.ba, offset, this.isBE)
Datatype.STRING -> {
if (datatype.isVlenString) {
val ret = sdata.getFromHeap(offset)
if (ret is List<*>) {
ArrayString(intArrayOf(ret.size), ret as List<String>)
if (ret.size == 1) ret[0]!! else ArrayString(intArrayOf(ret.size), ret as List<String>)
} else if (ret is String) {
ret
} else {
"unknown $ret"
}
} else {
makeStringZ(sdata.ba, offset, nelems) // nelems ??
makeStringZ(sdata.ba, offset, nelems * datatype.size) // TODO what about non-hdf5 ?
}
}
Datatype.VLEN -> {
Expand All @@ -67,13 +81,33 @@ class StructureMember<T>(orgName: String, val datatype : Datatype<T>, val offset
}
}

/** Same as value(sdata: ArrayStructureData.StructureData): Any when nelems > 1 */
fun values(sdata: ArrayStructureData.StructureData): ArrayTyped<*> {
val offset = sdata.offset + this.offset
val tba = TypedByteArray(this.datatype, sdata.ba, offset, this.isBE)
return tba.convertToArrayTyped(shape)
/** Same as value(sdata: ArrayStructureData.StructureData), except wrap scalars in ArrayTyped. */
fun values(sdata: StructureData): ArrayTyped<*> {
val value = value(sdata)
if (value is ArrayTyped<*>) return value

if (value is String) return ArrayString(intArrayOf(1), listOf(value as String))

return when (datatype) {
Datatype.BYTE -> ArrayByte(intArrayOf(1), ByteArray(1) { value as Byte })
Datatype.SHORT -> ArrayShort(intArrayOf(1), ShortArray(1) { value as Short })
Datatype.INT -> ArrayInt(intArrayOf(1), IntArray(1) { value as Int })
Datatype.LONG -> ArrayLong(intArrayOf(1), LongArray(1) { value as Long })
Datatype.UBYTE, Datatype.CHAR -> ArrayUByte(intArrayOf(1), UByteArray(1) { value as UByte })
Datatype.ENUM1, Datatype.ENUM2, Datatype.ENUM4, Datatype.ENUM8, -> ArrayInt(intArrayOf(1), IntArray(1) { value as Int })
Datatype.USHORT -> ArrayUShort(intArrayOf(1), UShortArray(1) { value as UShort })
Datatype.UINT -> ArrayUInt(intArrayOf(1), UIntArray(1) { value as UInt })
Datatype.ULONG -> ArrayULong(intArrayOf(1), datatype, ULongArray(1) { value as ULong })
Datatype.FLOAT -> ArrayFloat(intArrayOf(1), FloatArray(1) { value as Float })
Datatype.DOUBLE -> ArrayDouble(intArrayOf(1), DoubleArray(1) { value as Double })
Datatype.STRING -> ArrayString(intArrayOf(1), listOf(value as String))
else -> throw RuntimeException("StructureMember.values datatype $datatype")
}
}

// iterator over all the member values
fun values(arraysd: ArrayStructureData): Iterator<T> = DoubleIterator(arraysd.iterator(), this)

override fun toString(): String {
return "\nStructureMember(name='$name', datatype=$datatype, offset=$offset, dims=${shape.contentToString()}, nelems=$nelems)"
}
Expand All @@ -95,4 +129,30 @@ class StructureMember<T>(orgName: String, val datatype : Datatype<T>, val offset
result = 31 * result + nelems
return result
}
}

// double iterator (iterator of iterator)
class DoubleIterator<T>(val sdataIter: Iterator<StructureData>, val member: StructureMember<T>) : AbstractIterator<T>() {
var valueIterator : Iterator<T>

init {
val sdata = sdataIter.next()
val sda = member.values(sdata)
valueIterator = sda.iterator() as Iterator<T>
}

override fun computeNext() {
if (valueIterator.hasNext()) {
setNext(valueIterator.next())

} else if (sdataIter.hasNext()) {
val sdata = sdataIter.next()
val sda = member.values(sdata)
valueIterator = sda.iterator() as Iterator<T>
setNext(valueIterator.next())

} else {
done()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ class TypedByteArray<T>(val datatype: Datatype<T>, val ba: ByteArray, val offset
Datatype.INT -> ArrayInt(shape, IntArray(nelems) { this.get(it) as Int } )
Datatype.UINT, Datatype.ENUM4 -> ArrayUInt(shape, datatype, UIntArray(nelems) { this.get(it) as UInt })
Datatype.LONG -> ArrayLong(shape, LongArray(nelems) { this.get(it) as Long })
Datatype.ULONG, Datatype.ENUM8 -> ArrayULong(shape, ULongArray(nelems) { this.get(it) as ULong })
Datatype.ULONG, Datatype.ENUM8 -> ArrayULong(shape, datatype, ULongArray(nelems) { this.get(it) as ULong })
Datatype.DOUBLE -> ArrayDouble(shape, DoubleArray(nelems) { this.get(it) as Double })
Datatype.FLOAT -> ArrayFloat(shape, FloatArray(nelems) { this.get(it) as Float })
Datatype.STRING -> { // TODO kludge ?? maybe should be done in caller ??
Datatype.STRING -> { // TODO not dealing with vlen string; cant read out of ArrayStructureData heap
val useShape = if (elemSize == null) shape else (shape.toList() + listOf(elemSize)).toIntArray()
ArrayUByte.fromByteArray(useShape, ba).makeStringsFromBytes()
}
Datatype.REFERENCE -> ArrayLong(shape, LongArray(nelems) { this.get(it) as Long }) // TODO
else -> throw IllegalArgumentException("datatype ${datatype}")
else -> throw IllegalArgumentException("convertToArrayTyped cant handle datatype ${datatype}")
}
return result as ArrayTyped<T>
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,9 @@ class FractalHeapJ(val h5: H5builder, forWho: String, val fractalHeapAddress: Lo
}
}
}

//bb.rewind()
//ChecksumUtils.validateChecksum(bb)

logger.debug{"Read fractal heap at address $fractalHeapAddress, loaded ${directBlocks.size} direct blocks"}
}

/* // TODO
fun getFractalHeapId(heapId: ByteArray): ByteArray {
/* if (buffer.remaining() != idLength) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ internal data class H5TypeInfo(val isVlenString: Boolean, val isRefObject : Bool
}

Datatype5.Time -> Datatype.LONG.withSignedness(true) // LOOK use bitPrecision i suppose?
Datatype5.String -> if (isVlenString || elemSize > 1) Datatype.STRING.withVlen(isVlenString) else Datatype.CHAR
Datatype5.String -> {
if (isVlenString) Datatype.STRING.withVlen(isVlenString)
else if (elemSize > 1) Datatype.STRING.withSize(elemSize)
else Datatype.CHAR
}
Datatype5.Reference -> Datatype.REFERENCE // "object" gets converted to dataset path, "region" ignored

Datatype5.Opaque -> if (typedef != null) Datatype.OPAQUE.withTypedef(typedef) else Datatype.OPAQUE
Expand Down
15 changes: 10 additions & 5 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5cdmBuilder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,16 @@ internal fun H5builder.buildGroup(group5 : H5Group) : Group.Builder {
}

group5.variables.filter{ it.isVariable }.forEach {
val vb = buildVariable( groupb, it )
groupb.addVariable(vb)
val address = it.dataObject.address
// println("**H5builder vb.name=${vb.name} address=${it.dataObject.address}") // maybe there a byte order problem ??
if (address > 0) datasetMap[address] = Pair(groupb, vb)
try {
val vb = buildVariable( groupb, it )
groupb.addVariable(vb)
val address = it.dataObject.address
// println("**H5builder vb.name=${vb.name} address=${it.dataObject.address}") // maybe there a byte order problem ??
if (address > 0) datasetMap[address] = Pair(groupb, vb)
} catch (e: RuntimeException) {
e.printStackTrace()
// fall through
}
}

group5.nestedGroups.forEach { groupb.addGroup( buildGroup( it )) }
Expand Down
7 changes: 5 additions & 2 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5reader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@ internal fun <T> H5builder.readCompactData(v2 : Variable<T>, shape : IntArray):
is DataLayoutCompact3 -> vinfo.mdl.compactData
else -> throw RuntimeException("CompactData must be DataLayoutCompact or DataLayoutCompact3")
}
// bb.order(vinfo.h5type.isBE)

return this.processDataIntoArray(ba, vinfo.h5type.isBE, vinfo.h5type.datatype(), shape, vinfo.h5type, vinfo.elementSize) as ArrayTyped<T>
return if (vinfo.h5type.datatype5 == Datatype5.Vlen) {
this.processVlenIntoArray(vinfo.h5type, shape, ba, shape.computeSize(), vinfo.elementSize)
} else {
this.processDataIntoArray(ba, vinfo.h5type.isBE, vinfo.h5type.datatype(), shape, vinfo.h5type, vinfo.elementSize) as ArrayTyped<T>
}
}

// handles reading data with a Layout. LOOK: Fill Value ??
Expand Down
Loading
Loading