Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions sjsonnet/src/sjsonnet/Renderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,41 @@ import java.io.{StringWriter, Writer}

import upickle.core.{ArrVisitor, ObjVisitor}

final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer {
private[this] val builder = new java.lang.StringBuilder(initialCapacity)

override def write(c: Int): Unit =
builder.append(c.toChar)

override def write(cbuf: Array[Char], off: Int, len: Int): Unit =
builder.append(cbuf, off, len)

override def write(str: String): Unit =
builder.append(str)

override def write(str: String, off: Int, len: Int): Unit =
builder.append(str, off, off + len)

override def append(c: Char): Writer = {
builder.append(c)
this
}

override def append(csq: CharSequence): Writer = {
builder.append(if (csq == null) "null" else csq)
this
}

override def append(csq: CharSequence, start: Int, end: Int): Writer = {
builder.append(if (csq == null) "null" else csq, start, end)
this
}

override def flush(): Unit = ()
override def close(): Unit = ()
override def toString: String = builder.toString
}

/**
* Custom JSON renderer to try and match the behavior of google/jsonnet's render:
*
Expand Down Expand Up @@ -279,6 +314,93 @@ final case class MaterializeJsonRenderer(
}
}

private[sjsonnet] final class FastMaterializeJsonRenderer(
indent: Int = 4,
escapeUnicode: Boolean = false,
newline: String = "\n",
keyValueSeparator: String = ": ",
private val outWriter: StringBuilderWriter = new StringBuilderWriter())
extends BaseCharRenderer(
outWriter,
indent,
escapeUnicode,
newline.toCharArray
) {
private val newLineCharArray = newline.toCharArray
private val keyValueSeparatorCharArray = keyValueSeparator.toCharArray

private val reusableArrVisitor: ArrVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer
} = new ArrVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this
def visitValue(v: StringBuilderWriter, index: Int): Unit = {
flushBuffer()
commaBuffered = true
}
def visitEnd(index: Int): StringBuilderWriter = {
commaBuffered = false
depth -= 1
renderIndent()
elemBuilder.append(']')
flushCharBuilder()
outWriter
}
}

private val reusableObjVisitor: ObjVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer
def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer
} = new ObjVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this
def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer =
FastMaterializeJsonRenderer.this
def visitKeyValue(s: Any): Unit = {
elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length)
}
def visitValue(v: StringBuilderWriter, index: Int): Unit = {
commaBuffered = true
}
def visitEnd(index: Int): StringBuilderWriter = {
commaBuffered = false
depth -= 1
renderIndent()
elemBuilder.append('}')
flushCharBuilder()
outWriter
}
}

override def visitArray(
length: Int,
index: Int): upickle.core.ArrVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer
} = {
flushBuffer()
elemBuilder.append('[')

depth += 1
if (length == 0 && indent != -1)
elemBuilder.appendAll(newLineCharArray, newLineCharArray.length)
else renderIndent()
reusableArrVisitor
}

override def visitObject(
length: Int,
index: Int): upickle.core.ObjVisitor[StringBuilderWriter, StringBuilderWriter] {
def subVisitor: sjsonnet.FastMaterializeJsonRenderer
def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer
} = {
flushBuffer()
elemBuilder.append('{')
depth += 1
if (length == 0 && indent != -1)
elemBuilder.appendAll(newLineCharArray, newLineCharArray.length)
else renderIndent()
reusableObjVisitor
}
}

object RenderUtils {

// Pre-cached string representations of small integers (0-255)
Expand Down
12 changes: 8 additions & 4 deletions sjsonnet/src/sjsonnet/Util.scala
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ object Util {
while (i1 < n1 && i2 < n2) {
val c1 = s1.charAt(i1)
val c2 = s2.charAt(i2)
// Fast path: equal chars can be skipped without surrogate checks.
// Even for surrogate pairs, equal high surrogates at position i lead to
// comparing low surrogates at i+1, producing the correct codepoint ordering.
if (c1 == c2) {
// Fast path: equal non-surrogates can be skipped without codepoint checks.
// Equal surrogates still need codepoint decoding because a raw surrogate and
// a valid surrogate pair can share the same leading UTF-16 code unit.
if (c1 == c2 && !Character.isSurrogate(c1)) {
i1 += 1
i2 += 1
} else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) {
Expand All @@ -157,6 +157,10 @@ object Util {
override def compare(x: String, y: String): Int = compareStringsByCodepoint(x, y)
}

def sortStringsByCodepointInPlace(xs: Array[String]): Unit = {
java.util.Arrays.sort(xs, CodepointStringOrdering)
}

def compareJsonnetStd(v1: Val, v2: Val, ev: EvalScope): Int = {
val t1 = v1.prettyName
val t2 = v2.prettyName
Expand Down
3 changes: 2 additions & 1 deletion sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1822,7 +1822,8 @@ object Val {
private[sjsonnet] def sortedVisibleKeyNames: Array[String] = {
var r = _sortedVisibleKeyNames
if (r == null) {
r = visibleKeyNames.sorted(Util.CodepointStringOrdering)
r = visibleKeyNames.clone()
Util.sortStringsByCodepointInPlace(r)
_sortedVisibleKeyNames = r
}
r
Expand Down
6 changes: 3 additions & 3 deletions sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ object ManifestModule extends AbstractFunctionModule {
*/
private object ManifestJson extends Val.Builtin1("manifestJson", "v") {
def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val =
Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString)
Val.Str(pos, Materializer.apply0(v.value, new FastMaterializeJsonRenderer())(ev).toString)
}

/**
Expand All @@ -57,7 +57,7 @@ object ManifestModule extends AbstractFunctionModule {
Materializer
.apply0(
v.value,
MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":")
new FastMaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":")
)(ev)
.toString
)
Expand Down Expand Up @@ -94,7 +94,7 @@ object ManifestModule extends AbstractFunctionModule {
Materializer
.apply0(
v.value,
MaterializeJsonRenderer(
new FastMaterializeJsonRenderer(
indent = i.value.asString.length,
newline = newline.value.asString,
keyValueSeparator = keyValSep.value.asString
Expand Down
7 changes: 6 additions & 1 deletion sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,12 @@ object ObjectModule extends AbstractFunctionModule {
maybeSortKeys(ev, v1.allKeyNames)

@inline private def maybeSortKeys(ev: EvalScope, keys: Array[String]): Array[String] =
if (ev.settings.preserveOrder) keys else keys.sorted(Util.CodepointStringOrdering)
if (ev.settings.preserveOrder) keys
else {
val sorted = keys.clone()
Util.sortStringsByCodepointInPlace(sorted)
sorted
}

def getObjValuesFromKeys(
pos: Position,
Expand Down
53 changes: 53 additions & 0 deletions sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,69 @@ object UnicodeHandlingTests extends TestSuite {
val codepointSorted = testStrings.sorted(sjsonnet.Util.CodepointStringOrdering).toList
codepointSorted ==> List("\uFFFF", "\uD800\uDC00")

val inPlaceSorted = testStrings.clone()
sjsonnet.Util.sortStringsByCodepointInPlace(inPlaceSorted)
inPlaceSorted.toList ==> codepointSorted

// These produce different results, demonstrating the bug that was fixed
assert(utf16Sorted != codepointSorted)
}

test("codepointInPlaceSortMatchesReferenceOrdering") {
val samples = Array(
"",
"a",
"b",
"aa",
"\u0000",
"\uD800\uDC00", // U+10000
"\uFFFF",
"😀",
"🌍",
"🚀",
"é",
"Ω",
"中"
)

val cases = Seq(
Array.empty[String],
Array("a"),
Array("b", "a"),
Array.fill(20)("same"),
samples,
samples.reverse,
Array.tabulate(64)(i => samples((i * 7 + 3) % samples.length))
)

for (c <- cases) {
val actual = c.clone()
sjsonnet.Util.sortStringsByCodepointInPlace(actual)
actual.toList ==> c.sorted(sjsonnet.Util.CodepointStringOrdering).toList
}
}

test("codepointOrderingInJsonnet") {
// Verify that Jsonnet operations use Unicode codepoint ordering
eval("'\\uFFFF' < '\\uD800\\uDC00'") ==> ujson.Bool(true)
eval("std.sort(['\\uD800\\uDC00', '\\uFFFF'])") ==> ujson.Arr("\uFFFF", "\uD800\uDC00")
}

test("rawSurrogatePrefixOrdering") {
val rawSurrogatePrefix = "\uD800\uFFFF" // codepoints [0xD800, 0xFFFF]
val validSurrogatePair = "\uD800\uDC00" // codepoint [0x10000]

assert(sjsonnet.Util.compareStringsByCodepoint(rawSurrogatePrefix, validSurrogatePair) < 0)
assert(sjsonnet.Util.compareStringsByCodepoint(validSurrogatePair, rawSurrogatePrefix) > 0)

eval("(std.char(55296) + std.char(65535)) < (std.char(55296) + std.char(56320))") ==>
ujson.Bool(true)

eval(
"std.sort([std.char(55296) + std.char(56320), std.char(55296) + std.char(65535)])"
) ==> ujson.Arr(rawSurrogatePrefix, validSurrogatePair)
}

// Unpaired surrogate handling - sjsonnet-specific behavior
//
// Note: This is an intentional divergence from go-jsonnet and C++ jsonnet:
Expand Down
Loading