Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion sjsonnet/src/sjsonnet/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,10 @@ class Parser(
// cost more than the potential memory savings for strings that are unlikely
// to repeat (e.g., 600KB text block literals)
val unique = if (s.length > 1024) s else internedStrings.getOrElseUpdate(s, s)
Val.Str(pos, unique)
val result = Val.Str(pos, unique)
if (unique.length > 1024 && CharSWAR.isAsciiJsonSafe(unique))
result._asciiSafe = true
result
}

// Any `expr` that isn't naively left-recursive
Expand Down
8 changes: 6 additions & 2 deletions sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,15 @@ object Val {
if (ls != null && ls.isEmpty) return right
if (rs != null && rs.isEmpty) return left
// Small string eagerness: both flat and combined length <= 128
if (ls != null && rs != null && ls.length + rs.length <= 128)
return new Str(pos, ls + rs)
if (ls != null && rs != null && ls.length + rs.length <= 128) {
val result = new Str(pos, ls + rs)
if (left._asciiSafe && right._asciiSafe) result._asciiSafe = true
return result
}
// Rope node: O(1)
val node = new Str(pos, null)
node._children = Array(left, right)
if (left._asciiSafe && right._asciiSafe) node._asciiSafe = true
node
}
}
Expand Down
27 changes: 21 additions & 6 deletions sjsonnet/src/sjsonnet/stdlib/StringModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,14 @@ object StringModule extends AbstractFunctionModule {
Val.cachedNum(
pos,
(x.value match {
case Val.Str(_, s) => s.codePointCount(0, s.length)
case a: Val.Arr => a.length
case o: Val.Obj => o.visibleKeyNames.length
case o: Val.Func => o.params.names.length
case x => Error.fail("Cannot get length of " + x.prettyName)
case v: Val.Str =>
val s = v.str
if (v._asciiSafe) s.length
else s.codePointCount(0, s.length)
case a: Val.Arr => a.length
case o: Val.Obj => o.visibleKeyNames.length
case o: Val.Func => o.params.names.length
case x => Error.fail("Cannot get length of " + x.prettyName)
}).toDouble
)
}
Expand Down Expand Up @@ -126,7 +129,9 @@ object StringModule extends AbstractFunctionModule {
*/
private object Substr extends Val.Builtin3("substr", "str", "from", "len") {
def evalRhs(_s: Eval, from: Eval, len: Eval, ev: EvalScope, pos: Position): Val = {
val str = _s.value.asString
val srcVal = _s.value
val str = srcVal.asString
val srcAsciiSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe
val offset = from.value match {
case v: Val.Num => v.asPositiveInt
case _ => Error.fail("Expected a number for offset in substr, got " + from.value.prettyName)
Expand All @@ -138,6 +143,16 @@ object StringModule extends AbstractFunctionModule {

if (length <= 0) {
Val.Str(pos, "")
} else if (srcAsciiSafe) {
val strLen = str.length
val safeOffset = math.min(offset, strLen)
val safeLength = math.min(length, strLen - safeOffset)
if (safeLength <= 0) Val.Str(pos, "")
else {
val result = Val.Str(pos, str.substring(safeOffset, safeOffset + safeLength))
result._asciiSafe = true
result
}
} else {
val requestedEnd = offset.toLong + length.toLong
if (
Expand Down
10 changes: 10 additions & 0 deletions sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ object UnicodeHandlingTests extends TestSuite {
test("stringLength") {
eval("std.length('🌍')") ==> ujson.Num(1)
eval("std.length('Hello 🌍')") ==> ujson.Num(7)
eval("std.length('ASCII only')") ==> ujson.Num(10)
// Jsonnet strings are defined over codepoints, not grapheme clusters, so the
// following "family" emoji has a length of 7 (because it has 7 codepoints):
eval("std.length('👨‍👩‍👧‍👦')") ==> ujson.Num(7)
Expand Down Expand Up @@ -53,10 +54,19 @@ object UnicodeHandlingTests extends TestSuite {
eval("std.substr('A🌍B', 0, 1)") ==> ujson.Str("A")
eval("std.substr('A🌍B', 1, 1)") ==> ujson.Str("🌍")
eval("std.substr('A🌍B', 2, 1)") ==> ujson.Str("B")
eval("std.substr('ASCII only', 6, 4)") ==> ujson.Str("only")
eval("std.substr('Hello 🌍 World', 6, 100)") ==> ujson.Str("🌍 World")
eval("std.substr('🌍', 1, 5)") ==> ujson.Str("") // Beyond string length
}

test("longAsciiLengthAndSubstr") {
val longAscii = "a" * 1030
eval(s"std.length('$longAscii')") ==> ujson.Num(1030)
eval(s"std.substr('$longAscii', 1028, 20)") ==> ujson.Str("aa")
eval(s"std.substr('$longAscii', 1031, 20)") ==> ujson.Str("")
eval(s"std.substr('$longAscii' + '$longAscii', 2058, 10)") ==> ujson.Str("aa")
}

test("stringSlice") {
eval("'A🌍B'[0:1]") ==> ujson.Str("A")
eval("'A🌍B'[1:2]") ==> ujson.Str("🌍")
Expand Down
Loading