Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions sjsonnet/src-js/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ object CharSWAR {
false
}

def isAsciiJsonSafe(s: String): Boolean = {
var i = 0
val len = s.length
while (i < len) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
i += 1
}
true
}

/** Scalar scan for byte[] — used by ByteRenderer for UTF-8 encoded data. */
def hasEscapeChar(arr: Array[Byte], from: Int, to: Int): Boolean = {
var i = from
Expand Down
3 changes: 3 additions & 0 deletions sjsonnet/src-js/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ object Platform {
}
}

def isAsciiJsonSafe(s: String): Boolean =
CharSWAR.isAsciiJsonSafe(s)

private def nodeToJson(node: Node): ujson.Value = node match {
case _: Node.ScalarNode =>
YamlDecoder.forAny.construct(node).getOrElse("") match {
Expand Down
55 changes: 55 additions & 0 deletions sjsonnet/src-jvm/sjsonnet/CharSWAR.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ private CharSWAR() {}
/** Below this length, scalar charAt is faster than SWAR + byte[] conversion. */
private static final int SWAR_THRESHOLD = 128;

private static final long U16_HOLE = 0x7FFF_7FFF_7FFF_7FFFL;
private static final long U16_QUOTE = 0x0022_0022_0022_0022L;
private static final long U16_BSLAS = 0x005C_005C_005C_005CL;
private static final long U16_CTRL = 0xFFE0_FFE0_FFE0_FFE0L;
private static final long U16_ASCII = 0xFF80_FF80_FF80_FF80L;

/**
* Check if any char in {@code str} needs JSON string escaping.
* Scan-first API: call on the String before copying to the output buffer.
Expand All @@ -80,6 +86,34 @@ static boolean hasEscapeChar(byte[] arr, int from, int to) {
return hasEscapeCharSWAR(arr, from, to);
}

/**
* Check if {@code str} can be emitted as JSON string content with no escaping and no UTF-8
* encoding step: all chars must be printable ASCII excluding {@code '"'} and {@code '\\'}.
*/
static boolean isAsciiJsonSafe(String str) {
int len = str.length();
if (len < 8) {
return isAsciiJsonSafeScalar(str, len);
}
int i = 0;
int limit = len - 3; // 4 UTF-16 chars per word
while (i < limit) {
long word =
((long) str.charAt(i)) |
((long) str.charAt(i + 1) << 16) |
((long) str.charAt(i + 2) << 32) |
((long) str.charAt(i + 3) << 48);
if (swarHasUnsafeAsciiChar(word)) return false;
i += 4;
}
while (i < len) {
char c = str.charAt(i);
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false;
i++;
}
return true;
}

/**
* Check if any char in {@code arr[from..to)} needs JSON string escaping.
*/
Expand Down Expand Up @@ -160,6 +194,19 @@ private static int firstMatchedByte(long mask) {
: Long.numberOfLeadingZeros(mask)) >>> 3;
}

private static boolean swarHasUnsafeAsciiChar(long word) {
if ((word & U16_ASCII) != 0L) return true;

long qz = zero16(word ^ U16_QUOTE);
long bz = zero16(word ^ U16_BSLAS);
long cz = zero16(word & U16_CTRL);
return (qz | bz | cz) != 0L;
}

private static long zero16(long word) {
return ~((word & U16_HOLE) + U16_HOLE | word | U16_HOLE);
}

/** Scalar scan for String (used for short strings). */
private static boolean hasEscapeCharScalar(String s, int len) {
for (int i = 0; i < len; i++) {
Expand All @@ -168,4 +215,12 @@ private static boolean hasEscapeCharScalar(String s, int len) {
}
return false;
}

private static boolean isAsciiJsonSafeScalar(String s, int len) {
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false;
}
return true;
}
}
3 changes: 3 additions & 0 deletions sjsonnet/src-jvm/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ object Platform {
def repeatString(s: String, count: Int): String =
if (count <= 0) "" else s.repeat(count)

def isAsciiJsonSafe(s: String): Boolean =
CharSWAR.isAsciiJsonSafe(s)

def gzipBytes(b: Array[Byte]): String = {
val outputStream: ByteArrayOutputStream = new ByteArrayOutputStream(b.length)
val gzip: GZIPOutputStream = new GZIPOutputStream(outputStream)
Expand Down
49 changes: 49 additions & 0 deletions sjsonnet/src-native/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ object CharSWAR {
private final val CTRL = 0xe0e0e0e0e0e0e0e0L
private final val LITTLE_ENDIAN =
java.nio.ByteOrder.nativeOrder() == java.nio.ByteOrder.LITTLE_ENDIAN
private final val U16_HOLE = 0x7fff7fff7fff7fffL
private final val U16_QUOTE = 0x0022002200220022L
private final val U16_BSLAS = 0x005c005c005c005cL
private final val U16_CTRL = 0xffe0ffe0ffe0ffe0L
private final val U16_ASCII = 0xff80ff80ff80ff80L

/**
* SWAR: returns a mask for byte lanes in `word` containing '"' (0x22), '\\' (0x5C), or a control
Expand Down Expand Up @@ -68,6 +73,29 @@ object CharSWAR {
false
}

def isAsciiJsonSafe(s: String): Boolean = {
val len = s.length
if (len < 8) return isAsciiJsonSafeScalar(s, len)

var i = 0
val limit = len - 3
while (i < limit) {
val word =
(s.charAt(i).toLong) |
(s.charAt(i + 1).toLong << 16) |
(s.charAt(i + 2).toLong << 32) |
(s.charAt(i + 3).toLong << 48)
if (swarHasUnsafeAsciiChar(word)) return false
i += 4
}
while (i < len) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
i += 1
}
true
}

/**
* SWAR scan for byte[] using Intrinsics.loadLong for zero-overhead bulk reads. Processes 8 bytes
* per iteration — same throughput as the JVM VarHandle path. UTF-8 multi-byte sequences never
Expand Down Expand Up @@ -127,6 +155,27 @@ object CharSWAR {
false
}

@inline private def swarHasUnsafeAsciiChar(word: Long): Boolean = {
if ((word & U16_ASCII) != 0L) return true
val qz = zero16(word ^ U16_QUOTE)
val bz = zero16(word ^ U16_BSLAS)
val cz = zero16(word & U16_CTRL)
(qz | bz | cz) != 0L
}

@inline private def zero16(word: Long): Long =
~((word & U16_HOLE) + U16_HOLE | word | U16_HOLE)

@inline private def isAsciiJsonSafeScalar(s: String, len: Int): Boolean = {
var i = 0
while (i < len) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
i += 1
}
true
}

@inline private def hasEscapeCharScalarBytes(arr: Array[Byte], from: Int, to: Int): Boolean = {
var i = from
while (i < to) {
Expand Down
3 changes: 3 additions & 0 deletions sjsonnet/src-native/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ object Platform {
}
}

def isAsciiJsonSafe(s: String): Boolean =
CharSWAR.isAsciiJsonSafe(s)

def gzipBytes(b: Array[Byte]): String = {
val outputStream: ByteArrayOutputStream = new ByteArrayOutputStream(b.length)
val gzip: GZIPOutputStream = new GZIPOutputStream(outputStream)
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/src/sjsonnet/BaseByteRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class BaseByteRenderer[T <: java.io.OutputStream](
visitObject(length, index)

protected val elemBuilder = new upickle.core.ByteBuilder
private[this] val unicodeCharBuilder = new upickle.core.CharBuilder
private val unicodeCharBuilder = new upickle.core.CharBuilder

def flushByteBuilder(): Unit = {
elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 8192)
Expand Down
4 changes: 2 additions & 2 deletions sjsonnet/src/sjsonnet/Evaluator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class Evaluator(
def trace(e: String): Unit = if (logger != null) logger(true, e)
def warn(e: Error): Unit = if (logger != null) logger(false, Error.formatError(e))

private[this] var stackDepth: Int = 0
private[this] val maxStack: Int = settings.maxStack
private var stackDepth: Int = 0
private val maxStack: Int = settings.maxStack
private[sjsonnet] var profiler: Profiler = _

@inline private[sjsonnet] final def checkStackDepth(pos: Position): Unit = {
Expand Down
73 changes: 60 additions & 13 deletions sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ object Val {
if (!isConcatView && !_reversed && (arr ne null)) arr.asInstanceOf[Array[Eval]]
else null

private[sjsonnet] def constantEval: Eval = null

/**
* If both this and other are ConcatViews sharing the same left array, return the shared prefix
* length. Otherwise return 0. Used by compare/equal to skip identical prefix elements entirely,
Expand Down Expand Up @@ -761,13 +763,13 @@ object Val {
// Exceptions intentionally reset the slot to null rather than being cached. That matches the
// existing LazyFunc/LazyExpr behavior and avoids changing observable std.trace/error behavior
// on repeated failed evaluation.
private[this] var slots: Array[AnyRef] = null
private var slots: Array[AnyRef] = null

protected def computeValue(i: Int): Val

protected def errorScope: EvalErrorScope

private[this] def ensureSlots(): Array[AnyRef] = {
private def ensureSlots(): Array[AnyRef] = {
val current = slots
if (current != null) current
else {
Expand Down Expand Up @@ -835,12 +837,46 @@ object Val {
}
}

private final class RepeatedArr(pos0: Position, private[this] val source: Arr, count: Int)
private final class ConstArr(pos0: Position, size: Int, private val elem: Eval)
extends Arr(pos0, null) {
_length = size

@inline private def checkIndex(i: Int): Unit =
if (i < 0 || i >= _length) throw new ArrayIndexOutOfBoundsException(i)

override def value(i: Int): Val = {
checkIndex(i)
elem.value
}

override def eval(i: Int): Eval = {
checkIndex(i)
elem
}

override private[sjsonnet] def constantEval: Eval = elem

override def asLazyArray: Array[Eval] = {
val materialized = arr
if (materialized != null) materialized.asInstanceOf[Array[Eval]]
else {
val result = new Array[Eval](_length)
java.util.Arrays.fill(result.asInstanceOf[Array[AnyRef]], elem.asInstanceOf[AnyRef])
arr = result
result
}
}

override def reversed(newPos: Position): Arr =
new ConstArr(newPos, _length, elem)
}

private final class RepeatedArr(pos0: Position, private val source: Arr, count: Int)
extends Arr(pos0, null) {
// Keep std.repeat(array, n) as an indexed view. The common consumers either index a subset or
// materialize later anyway; eagerly copying here multiplies thunks and stresses young-gen GC.
private[this] val sourceLen = source.length
private[this] val totalLen = sourceLen.toLong * count.toLong
private val sourceLen = source.length
private val totalLen = sourceLen.toLong * count.toLong
if (totalLen > Int.MaxValue) throw new IllegalArgumentException("array too large")
_length = totalLen.toInt

Expand All @@ -850,6 +886,9 @@ object Val {
override def eval(i: Int): Eval =
source.eval(i % sourceLen)

override private[sjsonnet] def constantEval: Eval =
if (sourceLen == 1) source.eval(0) else source.constantEval

override def asLazyArray: Array[Eval] = {
val sourceArr = source.asLazyArray
val sourceLen = this.sourceLen
Expand All @@ -869,14 +908,14 @@ object Val {

private final class SliceArr(
pos0: Position,
private[this] var source: Arr,
private[this] val start: Int,
private[this] val step: Int,
private var source: Arr,
private val start: Int,
private val step: Int,
size: Int)
extends Arr(pos0, null) {
_length = size

@inline private[this] def sourceIndex(i: Int): Int = start + i * step
@inline private def sourceIndex(i: Int): Int = start + i * step

override def value(i: Int): Val =
if ((arr ne null) || isConcatView) super.value(i)
Expand Down Expand Up @@ -919,9 +958,9 @@ object Val {
private abstract class LazyViewArr(pos0: Position, size: Int) extends Arr(pos0, null) {
_length = size

private[this] var values: Array[Val] = _
private[this] var evals: Array[Eval] = _
private[this] var computedCount: Int = 0
private var values: Array[Val] = _
private var evals: Array[Eval] = _
private var computedCount: Int = 0

protected def computeAt(index: Int): Val

Expand All @@ -936,7 +975,7 @@ object Val {
if (cache == null) null else cache(i)
}

private[this] def valueCache: Array[Val] = {
private def valueCache: Array[Val] = {
var cache = values
if (cache == null) {
cache = new Array[Val](_length)
Expand Down Expand Up @@ -1278,6 +1317,14 @@ object Val {
object Arr {
def apply(pos: Position, arr: Array[? <: Eval]): Arr = new Arr(pos, arr)

def constant(pos: Position, size: Int, elem: Eval): Arr =
if (size == 0) Arr(pos, EMPTY_EVAL_ARRAY)
else if (size < LAZY_VIEW_THRESHOLD) {
val result = new Array[Eval](size)
java.util.Arrays.fill(result.asInstanceOf[Array[AnyRef]], elem.asInstanceOf[AnyRef])
Arr(pos, result)
} else new ConstArr(pos, size, elem)

def repeated(pos: Position, source: Arr, count: Int): Arr =
if (count == 0 || source.length == 0) Arr(pos, EMPTY_EVAL_ARRAY)
else new RepeatedArr(pos, source, count)
Expand Down
9 changes: 4 additions & 5 deletions sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,9 @@ object ArrayModule extends AbstractFunctionModule {
}
val res: Val = what match {
case Val.Str(_, str) =>
Val.Str(pos, Platform.repeatString(str, count))
val repeated = Platform.repeatString(str, count)
if (Platform.isAsciiJsonSafe(str)) Val.Str.asciiSafe(pos, repeated)
else Val.Str(pos, repeated)
case a: Val.Arr =>
if (a.length.toLong * count.toLong > Int.MaxValue)
Error.fail("array too large", pos)(ev)
Expand All @@ -919,10 +921,7 @@ object ArrayModule extends AbstractFunctionModule {
val body = func.bodyExpr
if (func.params.names.length == 1 && body != null && body.isInstanceOf[Val.Literal]) {
// Function body is a constant (e.g. `function(_) 'x'`).
// Keep the eager shared-value array: it is smaller and faster than a lazy view here.
val a = new Array[Eval](sz)
java.util.Arrays.fill(a.asInstanceOf[Array[AnyRef]], body.asInstanceOf[Val])
Val.Arr(pos, a)
Val.Arr.constant(pos, sz, body.asInstanceOf[Val])
} else {
Val.Arr.makeArray(pos, sz, func, pos, pos.noOffset, ev)
}
Expand Down
Loading
Loading