Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/auto-jdk-matrix.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: Auto JDK Matrix Test & Install

on:
# push:
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
# pull_request:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# # The branches below must be a subset of the branches above
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
push:
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
pull_request:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# The branches below must be a subset of the branches above
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:

env:
Expand All @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
jdk: [ 21 ]
jdk: [ 24 ]

env:
JDK_VERSION: ${{ matrix.jdk }}
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/auto-os-matrix.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: Auto OS Matrix Test & Install

on:
# push:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
# pull_request:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# # The branches below must be a subset of the branches above
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
push:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
pull_request:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# The branches below must be a subset of the branches above
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:

env:
Expand All @@ -21,7 +21,7 @@ jobs:
fail-fast: false

matrix:
jdk: [ 21 ]
jdk: [ 24 ]
os: [ windows-latest, ubuntu-latest, macos-latest ]
include:
- os: windows-latest
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/check_cpp_files.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: CPP SerDe Compatibility Test

on:
# push:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
# pull_request:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# # The branches below must be a subset of the branches above
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
push:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
pull_request:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# The branches below must be a subset of the branches above
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:

jobs:
Expand All @@ -27,7 +27,7 @@ jobs:
- name: Setup Java
uses: actions/setup-java@v4
with:
java-version: '21'
java-version: '24'
distribution: 'temurin'

- name: Configure C++ build
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: "CodeQL"

on:
# push:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
# pull_request:
# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# # The branches below must be a subset of the branches above
# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
push:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
pull_request:
paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
# The branches below must be a subset of the branches above
branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -35,7 +35,7 @@ jobs:
with:
distribution: 'temurin'
cache: 'maven'
java-version: '21'
java-version: '24'

- name: Initialize CodeQL
uses: github/codeql-action/init@v3
Expand Down
11 changes: 7 additions & 4 deletions .github/workflows/javadoc.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
name: JavaDoc

on:
# push:
# branches: main
push:
branches: main
workflow_dispatch:

permissions:
contents: write

jobs:
javadoc:
runs-on: ubuntu-latest
Expand All @@ -16,7 +19,7 @@ jobs:
- name: Setup Java
uses: actions/setup-java@v4
with:
java-version: '21'
java-version: '24'
distribution: 'temurin'

- name: Echo Java Version
Expand All @@ -30,7 +33,7 @@ jobs:
run: mvn javadoc:javadoc

- name: Deploy JavaDoc
uses: JamesIves/github-pages-deploy-action@v4.6.8
uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29
with:
token: ${{ secrets.GITHUB_TOKEN }}
folder: target/reports/apidocs
Expand Down
3 changes: 0 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,6 @@ under the License.
<docfilessubdirs>true</docfilessubdirs>
<show>public</show>
<doclint>all,-missing</doclint>
<additionalJOptions>
<additionalJOption>${jvm-arguments}</additionalJOption>
</additionalJOptions>
</configuration>
<executions>
<execution>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/common/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ public static long floorPowerOf2(final long n) {
}

/**
* This is a long integer equivalent to <i>Math.ceil(n / (double)(1 << k))</i>
* This is a long integer equivalent to <i>Math.ceil(n / (double)(1 &lt;&lt; k))</i>
* where: <i>0 &lt; k &le; 6</i> and <i>n</i> is a non-negative long.
* These limits are not checked for speed reasons.
* @param n the input dividend as a positive long greater than zero.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
* arXiv 1708.06839, August 22, 2017, Yahoo Research.
*
* <p>[5] MemorySegment Component, See
* <a href="https://openjdk.org/jeps/454"><i>JEP 454: Foreign Function & Memory API</i></a>
* <a href="https://openjdk.org/jeps/454"><i>JEP 454: Foreign Function And Memory API</i></a>
*
* <p>[6] MacBook Pro 2.3 GHz 8-Core Intel Core i9
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB
/**
* Checks a sketch's serial version and flags to see if the sketch can be wrapped as a
* DirectCompactDoubleSketch. Throws an exception if the sketch is neither empty nor compact
* and ordered, unles the sketch uses serialization version 2.
* and ordered, unless the sketch uses serialization version 2.
* @param serVer the serialization version
* @param flags Flags from the sketch to evaluate
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB
static void checkCompact(final int serVer, final int flags) {
final boolean compact = (serVer == 2) || ((flags & COMPACT_FLAG_MASK) > 0);
if (compact) {
throw new SketchesArgumentException("Compact MemorySegment is not supported for Wrap Instance.");
throw new SketchesArgumentException("MemorySegment is in compact form and is not supported for this writableWrap Instance.");
}
}

Expand Down
52 changes: 43 additions & 9 deletions src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,29 +151,63 @@ public static DoublesSketch heapify(final MemorySegment srcSeg) {
}

/**
* Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable.
* Wrap this sketch around the given MemorySegment image of a compact, read-only DoublesSketch.
*
* @param srcSeg the given MemorySegment image of a DoublesSketch that may have data
* @return a sketch that wraps the given srcSeg in read-only mode.
* @param srcSeg the given MemorySegment image of a compact, read-only DoublesSketch.
* @return a compact, read-only sketch that wraps the given MemorySegment.
*/
public static DoublesSketch wrap(final MemorySegment srcSeg) {
if (!checkIsMemorySegmentCompact(srcSeg)) {
throw new SketchesArgumentException(
"MemorySegment sketch image must be in compact form. "
+ "Use {@link #writableWrap(MemorySegment writableWrap(...)} for updatable sketches.");
}
return DirectCompactDoublesSketch.wrapInstance(srcSeg);
}

/**
* Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch.
*
* <p>The given MemorySegment must be writable and it must contain a <i>UpdateDoublesSketch</i>.
* The sketch will be updated and managed totally within the MemorySegment. If the given source
* MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.</p>
*
* <p><b>NOTE:</b>If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch
* will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will
* return a new MemorySegment on the heap.</p>
*
* @param srcSeg the given MemorySegment image of an <i>UpdateDoublesSketch</i>.
* @return an updatable sketch that wraps the given MemorySegment.
*/
public static DoublesSketch writableWrap(final MemorySegment srcSeg) {
if (checkIsMemorySegmentCompact(srcSeg)) {
return DirectCompactDoublesSketch.wrapInstance(srcSeg);
throw new SketchesArgumentException(
"MemorySegment sketch image must be in updatable form. "
+ "Use {@link #wrap(MemorySegment wrap(...)} for compact sketches.");
}
return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null);
}

/**
* Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable.
* Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch.
*
* <p>The given MemorySegment must be writable and it must contain a <i>UpdateDoublesSketch</i>.
* The sketch will be updated and managed totally within the MemorySegment. If the given source
* MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.</p>
*
* <p><b>NOTE:</b>If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch
* will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will
* return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible
* handling of requests for more capacity is required.</p>
*
* @param srcSeg the given MemorySegment image of a DoublesSketch that may have data.
* @param srcSeg the given MemorySegment image of a DoublesSketch.
* @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand.
* Otherwise, it can be null and the default MemorySegmentRequest will be used.
* @return a sketch that wraps the given srcSeg in read-only mode.
* @return a sketch that wraps the given MemorySegment.
*/
public static DoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) {
public static DoublesSketch writableWrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) {
if (checkIsMemorySegmentCompact(srcSeg)) {
return DirectCompactDoublesSketch.wrapInstance(srcSeg);
throw new SketchesArgumentException("MemorySegment sketch image must be in updatable form.");
}
return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.datasketches.quantiles;

import static org.apache.datasketches.common.Util.LS;
import static org.apache.datasketches.quantiles.ClassicUtil.checkIsMemorySegmentCompact;
import static org.apache.datasketches.quantiles.DoublesUtil.copyToHeap;

import java.lang.foreign.MemorySegment;
Expand Down Expand Up @@ -111,8 +112,8 @@ static DoublesUnionImpl heapifyInstance(final MemorySegment srcSeg) {
}

/**
* Returns an updatable Union object that wraps the data of the given MemorySegment
* image of a updatable DoublesSketch. The data of the Union will remain in the MemorySegment.
* Returns an Union object that wraps the data of the given MemorySegment image of a UpdateDoublesSketch.
* The data of the Union will remain in the MemorySegment.
*
* @param srcSeg A MemorySegment image of an updatable DoublesSketch to be used as the data structure for the union and will be modified.
* @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand.
Expand All @@ -138,7 +139,12 @@ public void union(final DoublesSketch sketchIn) {
@Override
public void union(final MemorySegment seg) {
Objects.requireNonNull(seg);
gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg, null));
if (checkIsMemorySegmentCompact(seg)) {
gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg));
} else {
gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.writableWrap(seg, null));
}

gadget_.doublesSV = null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,40 @@ public abstract class UpdateDoublesSketch extends DoublesSketch {
}

/**
* Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch.
* Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch.
*
* @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null.
* @return a sketch that wraps the given srcSeg
* <p>The given MemorySegment must be writable and it must contain a <i>UpdateDoublesSketch</i>.
* The sketch will be updated and managed totally within the MemorySegment. If the given source
* MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.</p>
*
* <p><b>NOTE:</b>If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch
* will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will
* return a new MemorySegment on the heap.</p>
*
* @param srcSeg a MemorySegment that contains sketch data.
* @return an instance of this sketch that wraps the given MemorySegment.
*/
public static UpdateDoublesSketch wrap(final MemorySegment srcSeg) {
return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null);
}

/**
* Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch.
* Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch and including an
* optional, user defined {@link MemorySegmentRequest MemorySegmentRequest}.
*
* <p>The given MemorySegment must be writable and it must contain a <i>UpdateDoublesSketch</i>.
* The sketch will be updated and managed totally within the MemorySegment. If the given source
* MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.</p>
*
* <p><b>NOTE:</b>If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch
* will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will
* return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible
* handling of requests for more capacity is required.</p>
*
* @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null.
* @param srcSeg a MemorySegment that contains sketch data.
* @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand.
* Otherwise, it can be null and the default MemorySegmentRequest will be used.
* @return a sketch that wraps the given srcSeg
* @return an instance of this sketch that wraps the given MemorySegment.
*/
public static UpdateDoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) {
return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static MemorySegment loadCompactMemorySegment(
}
if (preLongs > 1) {
insertCurCount(dstWSeg, curCount);
insertP(dstWSeg, (float) 1.0);
insertP(dstWSeg, (float) 0.0); //0.0 to be consistent with C++
}
if (preLongs > 2) {
insertThetaLong(dstWSeg, thetaLong);
Expand Down
30 changes: 30 additions & 0 deletions src/main/java/org/apache/datasketches/theta/PreambleUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,36 @@
* 3 ||----------------------Start of Compact Long Array----------------------------------|
* </pre>
*
* <p>The compressed CompactSketch has 8 bytes of preamble in exact mode because Theta can
* be assumed to be 1.0. In estimating mode, the 2nd 8 bytes is Theta as a Long. The following
* table assumes estimating mode. In any case the number of retained entries starts immediately
* after, followed immediately by the delta encoded compressed byte array.</p>
* Unique to this table:
* <ul><li>Byte 3: entryBits (entBits): max number of bits for any one 64 bit hash not
* including leading zeros. A value in the range [1,63].</li>
* <li>Byte 4: numEntriesBytes (numEB): number of bytes required to hold the integer of number
* of retained entries not including leading zero bytes. A value in the range [1,4].</li>
* <li>The number of retained entries is stored starting at byte 16 (assuming estimating mode)
* and may extend through bytes 17, 18 and 19. In any case, the delta encoded compressed array
* starts immediately after and could start at byte 17, 18, 19 or 20.</li>
* </ul>
*
* <pre>
* Long || Start Byte Adr:
* Adr:
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* 0 || Seed Hash | Flags | numEB | entBits| FamID | SerVer | PreLongs = 3 |
*
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
* 1 ||------------------------------THETA_LONG-------------------------------------------|
*
* || | | | (20) | (19) | (18) | (17) | 16 |
* 2 ||----------------Retained Entries stored as 1 to 4 bytes----------------------------|
*
* || | | | | | | | |
* 3 ||------------------Delta encoded compressed byte array------------------------------|
* </pre>
*
* <p>The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact
* array of longs representing a hash table.</p>
*
Expand Down
Loading
Loading