Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,16 +256,21 @@
#### Datatype
* _Datatype.ENUM_ returns an array of the corresponding UBYTE/USHORT/UINT. Call _data.convertEnums()_ to turn this into
an ArrayString of corresponding enum names.
* _Datatype.CHAR_: All Attributes of type CHAR are assumed to be Strings. All Variables of type CHAR return data as
ArrayUByte. Call _data.makeStringsFromBytes()_ to turn this into Strings with the array rank reduced by one.
* Netcdf-3 does not have STRING or UBYTE types. In practice, CHAR is used for either.
* Netcdf-4/HDF5 library encodes CHAR values as HDF5 string type with elemSize = 1, so we use that convention to detect
legacy CHAR variables in HDF5 files. (NC_CHAR should not be used in Netcdf-4, use NC_UBYTE or NC_STRING.)
* CHAR vs STRING:
* Attributes of type CHAR are always assumed to be Strings.
* Netcdf-3 does not have STRING or UBYTE types, and in practice, CHAR is used for either. Variables of type CHAR
return data as ArrayUByte.
* Netcdf-4 encodes CHAR values as HDF5 string type with elemSize = 1, so we use that convention to detect
legacy CHAR variables in HDF5 format. (NC_CHAR should not be used in new Netcdf-4 files, use NC_UBYTE or NC_STRING.)
Variables of type CHAR return data as STRING, since users can use UBYTE if thats what they intend.

Check failure on line 265 in Readme.md

View workflow job for this annotation

GitHub Actions / Check for spelling errors

thats ==> that's
* Netcdf-4/HDF5 String variables may be fixed or variable length. For fixed Strings, we set the size of Datatype.STRING to
the fixed size. For both fixed and variable length Strings, the string withh be truncated at the first zero byte, if any.
the fixed size. For both fixed and variable length Strings, the string will be truncated at the first zero byte, if any.
* HDF4 does not have a STRING type, but does have signed and unsigned CHAR, and signed and unsigned BYTE.
We map both signed and unsigned to Datatype.CHAR and handle it as above (Attributes are Strings, Variables are UBytes).
Both signed and unsigned CHAR are mapped to Datatype.CHAR, whose data is returned as Strings for Attributes,
and ArrayUByte for Variables.
* Call _data.makeStringsFromBytes() to turn ArrayUByte into ArrayString with the array reduced by one.
* _Datatype.STRING_ always appears to be variable length to the user, regardless of whether the data in the file is variable or fixed length.
* _Datatype.STRING_ is always encoded as UTF8. TODO add option to change encoding, probably when opening the file.

#### Typedef
Unlike Netcdf-Java, we follow Netcdf-4 "user defined types" and add typedefs for Compound, Enum, Opaque, and Vlen.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class StructureMember<T>(orgName: String, val datatype : Datatype<T>, val offset

if (nelems > 1 && !datatype.isVlenString && (datatype != Datatype.VLEN)) {
val tba = TypedByteArray(this.datatype, sdata.ba, offset, this.isBE)
return tba.convertToArrayTyped(shape)
return tba.convertToArrayTyped(shape) // TODO charToString = ??
}

val enumTypedef = if (!datatype.isEnum || this.datatype.typedef == null) null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ fun makeString(ba: ByteArray) = ba.decodeToString(charset = Charsets.UTF8)

// needed when setting fillValue from Attribute value
fun convertToBytes(datatype : Datatype<*>, value: Any?, isBE: Boolean, charset : Charset = Charsets.UTF8): ByteArray {
if ( value == null) return ByteArray(datatype.size)
if (value == null) return ByteArray(datatype.size)
return when (value) {
is Byte -> byteArrayOf(value)
// is Char -> byteArrayOf(value.toByte()) // avoid CHAR altogether
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,15 @@ class TypedByteArray<T>(val datatype: Datatype<T>, val ba: ByteArray, val offset
}
}

fun convertToArrayTyped(shape: IntArray, elemSize : Int? = null): ArrayTyped<T> {
fun convertToArrayTyped(shape: IntArray, elemSize : Int? = null, charToString : Boolean = false): ArrayTyped<T> {
val nelems = shape.computeSize()
val result = when (datatype) {
Datatype.BYTE -> ArrayByte(shape, ByteArray(nelems) { this.get(it) as Byte } )
Datatype.CHAR, Datatype.UBYTE, Datatype.ENUM1 -> ArrayUByte(shape, datatype, UByteArray(nelems) { this.get(it) as UByte })
Datatype.UBYTE, Datatype.ENUM1 -> ArrayUByte(shape, datatype, UByteArray(nelems) { this.get(it) as UByte })
Datatype.CHAR -> {
val ubytes = ArrayUByte(shape, datatype, UByteArray(nelems) { this.get(it) as UByte })
if (charToString) ubytes.makeStringsFromBytes() else ubytes
}
Datatype.SHORT -> ArrayShort(shape, ShortArray(nelems) { this.get(it) as Short })
Datatype.USHORT, Datatype.ENUM2 -> ArrayUShort(shape, datatype, UShortArray(nelems) { this.get(it) as UShort })
Datatype.INT -> ArrayInt(shape, IntArray(nelems) { this.get(it) as Int } )
Expand Down
11 changes: 0 additions & 11 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf4/Tag.kt
Original file line number Diff line number Diff line change
Expand Up @@ -378,17 +378,6 @@ internal class TagRasterImage(icode: Int, refno: Int, offset : Long, length : In
val shape = intArrayOf(tagID.ydim, tagID.xdim, tagID.nelems)
val tba = TypedByteArray(datatype, raw, 0, isBE = true)
raster = tba.convertToArrayTyped(shape)

/*
raster = when (datatype) {
Datatype.BYTE -> ArrayByte(shape, bb)
Datatype.UBYTE -> ArrayUByte(shape, bb)
Datatype.SHORT -> ArrayShort(shape, bb)
Datatype.USHORT -> ArrayUShort(shape, bb)
Datatype.INT -> ArrayInt(shape, bb)
Datatype.UINT -> ArrayUInt(shape, bb)
else -> throw RuntimeException("not supporting $datatype for TagRasterImage")
} */
}
}

Expand Down
28 changes: 13 additions & 15 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,19 @@ internal class H5chunkReader(val h5 : H5builder) {
var transferChunks = 0
val state = OpenFileState(0L, vinfo.h5type.isBE)
for (dataChunk: DataChunkIF in tiledData.dataChunks(wantSpace)) { // : Iterable<BTree1New.DataChunkEntry>
if (!dataChunk.isMissing()) { // TODO fill value
val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else {
if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
state.pos = dataChunk.childAddress()
val chunkData = h5.raf.readByteArray(state, dataChunk.chunkSize())
val filteredData = if (dataChunk.filterMask() == null) chunkData
else filters.apply(chunkData, dataChunk.filterMask()!!)
chunker.transferBA(filteredData, 0, elemSize, ba, 0)
transferChunks += chunker.transferChunks
}
val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else {
if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
state.pos = dataChunk.childAddress()
val chunkData = h5.raf.readByteArray(state, dataChunk.chunkSize())
val filteredData = if (dataChunk.filterMask() == null) chunkData
else filters.apply(chunkData, dataChunk.filterMask()!!)
chunker.transferBA(filteredData, 0, elemSize, ba, 0)
transferChunks += chunker.transferChunks
}
}

Expand Down
3 changes: 2 additions & 1 deletion core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5group.kt
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,10 @@ internal class H5GroupBuilder(
// gather the H5typedef found in DataObjects
if (nested.isTypedef) {
val mdt = nested.dataObject!!.mdt!!
// if its a typedef but not a variable, promote to shared
if (!nested.isVariable) mdt.isShared = true
val typename = if (mdt.isShared) nested.dataObject!!.name else null
val typedef = H5typedef(typename, mdt)

typedefs.add(typedef)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ internal fun <T> H5builder.processDataIntoArray(ba: ByteArray, isBE: Boolean, da
}

val tba = TypedByteArray(datatype, ba, 0, isBE = isBE)
return tba.convertToArrayTyped(shape)
return tba.convertToArrayTyped(shape, charToString = true)
}

// Put the variable length members (vlen, string) on the heap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package com.sunya.netchdf.hdf5

import com.sunya.cdm.api.*
import com.sunya.cdm.api.Datatype.Companion.STRING
import com.sunya.cdm.api.Datatype.Companion.CHAR
import com.sunya.cdm.array.ArrayEmpty
import com.sunya.cdm.array.ArraySingle
import com.sunya.cdm.array.ArrayString
Expand Down Expand Up @@ -43,7 +44,11 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {

val vinfo = v2.spObject as DataContainerVariable
if (vinfo.onlyFillValue) { // fill value only, no data
if (v2.datatype == STRING) return ArrayString(intArrayOf(1), listOf("")) as ArrayTyped<T>
if (v2.datatype == STRING) return ArrayString(v2.shape.toIntArray(), List(v2.nelems.toInt()) {""} ) as ArrayTyped<T>
if (v2.datatype == CHAR) {
val shapeMinus1 = if (v2.rank == 0) intArrayOf(1) else IntArray(v2.rank - 1) { v2.shape[it].toInt() }
return ArrayString(shapeMinus1, List(shapeMinus1.computeSize()) {""} ) as ArrayTyped<T>
}
val tba = TypedByteArray(v2.datatype, vinfo.fillValue, 0, isBE = vinfo.h5type.isBE)
return ArraySingle(wantSection.shape.toIntArray(), v2.datatype, tba.get(0))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import okio.FileSystem
import okio.Path
import okio.Path.Companion.toPath

const val testData = "core/src/commonTest/data/"
const val testData = "src/commonTest/data/"

fun testFilesIn(dirPath: String): TestFiles.SequenceBuilder {
return TestFiles.SequenceBuilder(dirPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ internal fun <T> processDataIntoArray(ba: ByteArray, isBE: Boolean, datatype5 :

// convert to array of Strings by reducing rank by 1, tricky shape shifting for non-scalars
if (datatype5 == Datatype5.String) {
val extshape = IntArray(shape.size + 1) { if (it == shape.size) elemSize else shape[it] }
val extshape = if (elemSize == 1) shape else IntArray(shape.size + 1) { if (it == shape.size) elemSize else shape[it] }
val result = ArrayUByte.fromByteArray(extshape, ba)
return result.makeStringsFromBytes() as ArrayTyped<T>
}
Expand All @@ -816,7 +816,7 @@ internal fun <T> processDataIntoArray(ba: ByteArray, isBE: Boolean, datatype5 :
}

val tba = TypedByteArray(datatype, ba, 0, isBE = isBE)
return tba.convertToArrayTyped(shape)
return tba.convertToArrayTyped(shape, charToString = true)
}

// Put the variable length members (vlen, string) on the heap
Expand Down
Loading
Loading