Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
[![Release](https://github.com/toon-format/toon-java/actions/workflows/release.yml/badge.svg)](https://github.com/toon-format/toon-java/actions/workflows/release.yml)
[![Maven Central](https://img.shields.io/maven-central/v/dev.toonformat/jtoon.svg)](https://central.sonatype.com/artifact/dev.toonformat/jtoon)
![Coverage](.github/badges/jacoco.svg)
[![SPEC v3.1](https://img.shields.io/badge/spec-v3.1-fef3c0?labelColor=1b1b1f)](https://github.com/toon-format/spec)
[![SPEC v3.3](https://img.shields.io/badge/spec-v3.3-fef3c0?labelColor=1b1b1f)](https://github.com/toon-format/spec)
[![License: MIT](https://img.shields.io/badge/license-MIT-fef3c0?labelColor=1b1b1f)](./LICENSE)

> **⚠️ Beta Status (v1.x.x):** This library is in active development and working towards spec compliance. Beta published to Maven Central. API may change before 2.0.0 release.
> **⚠️ Beta Status (v1.x.x):** This library is in active development. Beta published to Maven Central. API may change before 2.0.0 release.

Compact, human-readable serialization format for LLM contexts with **30-60% token reduction** vs JSON. Combines YAML-like indentation with CSV-like tabular arrays. Working towards full compatibility with the [official TOON specification](https://github.com/toon-format/spec).

Expand Down Expand Up @@ -90,7 +90,7 @@ Some Java-specific types are automatically normalized for LLM-safe output:
| `LocalDateTime` | ISO date-time string in quotes |
| `LocalDate` | ISO date string in quotes |
| `LocalTime` | ISO time string in quotes |
| `ZonedDateTime` | ISO zoned date-time string in quotes |
| `ZonedDateTime` | ISO offset date-time string in quotes |
| `OffsetDateTime` | ISO offset date-time string in quotes |
| `Instant` | ISO instant string in quotes |
| `java.util.Date` | ISO instant string in quotes |
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Some Java-specific types are automatically normalized for LLM-safe output:
| `LocalDateTime` | ISO date-time string in quotes |
| `LocalDate` | ISO date string in quotes |
| `LocalTime` | ISO time string in quotes |
| `ZonedDateTime` | ISO zoned date-time string in quotes |
| `ZonedDateTime` | ISO offset date-time string in quotes |
| `OffsetDateTime` | ISO offset date-time string in quotes |
| `Instant` | ISO instant string in quotes |
| `java.util.Date` | ISO instant string in quotes |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ <h3 id="temporal-types-heading">Temporal Types</h3>
<li>LocalDateTime → "2025-01-15T10:30:00"</li>
<li>LocalDate → "2025-01-15"</li>
<li>LocalTime → "10:30:00"</li>
<li>ZonedDateTime → "2025-01-15T10:30:00+01:00[Europe/Paris]"</li>
<li>ZonedDateTime → "2025-01-15T10:30:00+01:00"</li>
<li>OffsetDateTime → "2025-01-15T10:30:00+01:00"</li>
<li>Instant → "2025-01-15T09:30:00Z"</li>
<li>java.util.Date → converted to Instant then formatted</li>
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/dev/toonformat/jtoon/DecodeOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
* @param indent Number of spaces per indentation level (default: 2)
* @param delimiter Delimiter expected in tabular array rows and inline
* primitive arrays (default: COMMA)
* @param strict Strict validation mode. When true, throws
* IllegalArgumentException on invalid input. When false,
* uses best-effort parsing and returns null on errors
* (default: true)
* @param strict Strict validation mode (default: true). When true,
* throws IllegalArgumentException on invalid input.
* When false, uses best-effort parsing and top-level
* decode errors return null instead of throwing.
* @param expandPaths Path expansion mode for dotted keys (default: OFF)
* @param maxDepth Maximum allowed nesting depth during decoding (default: 512).
* Prevents StackOverflowError from deeply nested input.
Expand Down Expand Up @@ -64,6 +64,14 @@ public DecodeOptions() {

/**
* Compact constructor with validation.
*
* @param indent number of spaces per indentation level
* @param delimiter delimiter for tabular array rows and inline arrays
* @param strict strict validation mode flag
* @param expandPaths path expansion mode for dotted keys
* @param maxDepth maximum nesting depth
* @param maxArraySize maximum array elements
* @param maxStringLength maximum string length
*/
public DecodeOptions {
if (indent < 0) {
Expand Down
12 changes: 10 additions & 2 deletions src/main/java/dev/toonformat/jtoon/EncodeOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
* Configuration options for encoding data to JToon format.
*
* @param indent Number of spaces per indentation level (default: 2)
* @param delimiter Delimiter to use for tabular array rows and inline
* primitive arrays (default: COMMA)
* @param delimiter Delimiter used for both document delimiter and active
* array delimiter. Controls quoting for object field values
* (document delimiter) and inline array values / tabular
* rows (active delimiter). (default: COMMA)
* @param lengthMarker Optional marker to prefix array lengths in headers. When
* true, arrays render as [#N] instead of [N] (default:
* false)
Expand Down Expand Up @@ -42,6 +44,12 @@ public EncodeOptions() {

/**
* Compact constructor with validation.
*
* @param indent number of spaces per indentation level
* @param delimiter delimiter for tabular array rows and inline arrays
* @param lengthMarker whether to prefix array lengths with {@code #}
* @param flatten key folding mode for nested objects
* @param flattenDepth maximum depth of key folding
*/
public EncodeOptions {
if (indent < 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ static List<Object> parseArrayWithDelimiter(final String header, final int depth
}

if (arrayMatcher.find()) {
// In strict mode, reject bracket lengths with leading zeros (e.g. [03])
// unless the length is exactly "0".
if (context.options.strict()) {
final String lengthStr = arrayMatcher.group(2);
if (lengthStr.length() > 1 && lengthStr.charAt(0) == '0') {
throw new IllegalArgumentException(
"Invalid array length with leading zeros: [" + lengthStr + "]");
}
}
final int headerEndIdx = arrayMatcher.end();
final String afterHeader = header.substring(headerEndIdx).trim();

Expand Down
45 changes: 43 additions & 2 deletions src/main/java/dev/toonformat/jtoon/decoder/DecodeHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,26 @@ static void checkPathExpansionConflict(final Map<String, Object> map, final Stri
checkFinalValueConflict(key, existing, value, context);
}

/**
* Checks for duplicate keys in strict mode.
* Throws if the map already contains the given key and strict mode is enabled.
*
* @param map the map to check
* @param key the key being inserted
* @param context decode context for strict mode check
* @throws IllegalArgumentException if strict mode and key already exists
*/
static void checkDuplicateKey(final Map<String, Object> map, final String key, final DecodeContext context) {
if (context.options.strict() && map.containsKey(key)) {
throw new IllegalArgumentException(
"Duplicate key '" + key + "' at line " + (context.currentLine + 1));
}
}

/**
* Finds the depth of the next non-blank line, skipping blank lines.
*
* @param context decode an object to deal with lines, delimiter, and options
* @param context decode an object to deal with lines, delimiter and options
* @return the depth of the next non-blank line, or null if none exists
*/
static Integer findNextNonBlankLineDepth(final DecodeContext context) {
Expand All @@ -192,10 +208,35 @@ static Integer findNextNonBlankLineDepth(final DecodeContext context) {
return getDepth(context.lines[nextLineIdx], context);
}

/**
* Checks if a line contains unquoted brackets ({@code [} or {@code ]}).
* Used to detect malformed array header syntax in strict mode.
*
* @param line the line to check
* @return true if unquoted brackets are found
*/
static boolean hasUnquotedBrackets(final String line) {
boolean inQuotes = false;
boolean escaped = false;
for (int i = 0; i < line.length(); i++) {
final char c = line.charAt(i);
if (escaped) {
escaped = false;
} else if (c == BACKSLASH) {
escaped = true;
} else if (c == DOUBLE_QUOTE) {
inQuotes = !inQuotes;
} else if (!inQuotes && (c == '[' || c == ']')) {
return true;
}
}
return false;
}

/**
* Validates that there are no multiple primitives at root level in strict mode.
*
* @param context decode an object to deal with lines, delimiter, and options
* @param context decode an object to deal with lines, delimiter and options
* @throws IllegalArgumentException in case the next depth is equal to 0
*/
static void validateNoMultiplePrimitivesAtRoot(final DecodeContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ static void processKeyedArrayLine(final Map<String, Object> result, final String
} else {
// Check for conflicts with existing expanded paths
DecodeHelper.checkPathExpansionConflict(result, key, arrayValue, context);
DecodeHelper.checkDuplicateKey(result, key, context);
result.put(key, arrayValue);
}
}
Expand Down Expand Up @@ -236,6 +237,7 @@ private static void putKeyValueIntoMap(final Map<String, Object> map, final Stri
expandPathIntoMap(map, unescapedKey, value, context);
} else {
DecodeHelper.checkPathExpansionConflict(map, unescapedKey, value, context);
DecodeHelper.checkDuplicateKey(map, unescapedKey, context);
map.put(unescapedKey, value);
}
}
Expand Down Expand Up @@ -327,6 +329,7 @@ static boolean parseKeyedArrayField(final String fieldContent, final Map<String,
if (shouldExpandKey(originalKey, context)) {
expandPathIntoMap(item, key, arrayValue, context);
} else {
DecodeHelper.checkDuplicateKey(item, key, context);
item.put(key, arrayValue);
}

Expand All @@ -344,7 +347,7 @@ static boolean parseKeyedArrayField(final String fieldContent, final Map<String,
* @return true if the field was processed as a key-value pair, false otherwise
*/
static boolean parseKeyValueField(final String fieldContent, final Map<String, Object> item, final int depth,
final DecodeContext context) {
final DecodeContext context) {
final int colonIdx = DecodeHelper.findUnquotedColon(fieldContent);
if (colonIdx <= 0) {
return false;
Expand All @@ -359,6 +362,7 @@ static boolean parseKeyValueField(final String fieldContent, final Map<String, O
if (shouldExpandKey(fieldKey, context)) {
expandPathIntoMap(item, fieldKey, parsedValue, context);
} else {
DecodeHelper.checkDuplicateKey(item, fieldKey, context);
item.put(fieldKey, parsedValue);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ static Object parseListItem(final String content, final int depth, final DecodeC
);

final Map<String, Object> item = new LinkedHashMap<>();
DecodeHelper.checkDuplicateKey(item, key, context);
item.put(key, arrayValue);

// parseArrayWithDelimiter manages currentLine correctly:
Expand Down Expand Up @@ -130,6 +131,7 @@ static Object parseListItem(final String content, final int depth, final DecodeC
// List item is at depth + 1, so pass depth + 1 to parseObjectItemValue
parsedValue = ObjectDecoder.parseObjectItemValue(value, depth + 1, context);
}
DecodeHelper.checkDuplicateKey(item, key, context);
item.put(key, parsedValue);
parseListItemFields(item, depth, context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ private static void processRootKeyedArrayLine(final Map<String, Object> objectMa
} else {
// Check for conflicts with existing expanded paths
DecodeHelper.checkPathExpansionConflict(objectMap, key, arrayValue, context);
DecodeHelper.checkDuplicateKey(objectMap, key, context);
objectMap.put(key, arrayValue);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,20 @@ static Object parse(final String value, final int maxStringLength) {
return StringEscaper.unescape(value);
}

// Check for leading zeros (treat as string, except for "0", "-0", "0.0", etc.)
// Check for forbidden leading zeros (treat as string, except for "0", "-0", "0.0", etc.)
// Per spec §4: tokens like "05", "0001", "-05", "-0001" must be treated as strings.
// But "0.5", "0e1", "-0.5", "-0e1" are valid numbers.
final String trimmed = value.trim();
if (trimmed.length() > 1 && trimmed.matches("^-?0+[0-7].*")) {
return value;
if (trimmed.length() > 1) {
// Match forbidden leading zeros: starts with optional '-', then one or more zeros,
// then another digit (0-9) — meaning it's a multi-digit number with leading zeros.
// Exclude cases where the zero is part of a fractional/exponent form like "0.5", "0e1".
final boolean hasLeadingZeros = trimmed.matches("^-?0+\\d.*");
// But we must NOT match "0.5" style numbers (single zero integer part)
final boolean isLikelyFractionalOrExponent = trimmed.matches("^-?0[.eE].*");
if (hasLeadingZeros && !isLikelyFractionalOrExponent) {
return value; // treat as string
}
}

// Try parsing as number
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,13 @@ private static boolean handleBlankLineInTabularArray(final int expectedRowDepth,

/**
* Determines if tabular array parsing should terminate based on online depth.
* Implements the full disambiguation algorithm per spec §9.3:
* - Compute the first unquoted occurrence of the active delimiter and the first unquoted colon.
* - If a same-depth line has no unquoted colon → row.
* - If both appear, compare first-unquoted positions:
* - Delimiter before colon → row.
* - Colon before delimiter → key-value line (end of rows).
* - If a line has an unquoted colon but no unquoted active delimiter → key-value line.
*
* @param line the line to check
* @param lineDepth the depth of the line
Expand All @@ -214,7 +221,6 @@ private static boolean handleBlankLineInTabularArray(final int expectedRowDepth,
*/
private static boolean shouldTerminateTabularArray(final String line, final int lineDepth,
final int expectedRowDepth, final DecodeContext context) {
// Header depth is one level above the expected row depth
final int headerDepth = expectedRowDepth - 1;

if (lineDepth < expectedRowDepth) {
Expand All @@ -228,14 +234,47 @@ private static boolean shouldTerminateTabularArray(final String line, final int
return true; // Line depth is less than expected - terminate
}

// Check for a key-value pair at the expected row depth
if (lineDepth == expectedRowDepth) {
final String rowContent = line.substring(expectedRowDepth * context.options.indent());
final int colonIdx = DecodeHelper.findUnquotedColon(rowContent);
return colonIdx > 0; // Key-value pair at the same depth as rows - terminate an array
if (lineDepth != expectedRowDepth) {
return false;
}

return false;
// Spec §9.3 disambiguation at row depth
final String rowContent = line.substring(expectedRowDepth * context.options.indent());
final char delimChar = context.delimiter.getValue();
final int delimIdx = findFirstUnquoted(rowContent, delimChar);
final int colonIdx = DecodeHelper.findUnquotedColon(rowContent);

if (colonIdx < 0) {
return false; // No colon → this is a row
}

if (delimIdx < 0) {
return true; // Colon present, no delimiter → key-value line
}

// Both colon and delimiter present: compare positions
return colonIdx < delimIdx; // Colon first → key-value; delimiter first → row
}

/**
* Finds the index of the first unquoted occurrence of a character in a string.
*/
private static int findFirstUnquoted(final String content, final char target) {
boolean inQuotes = false;
boolean escaped = false;
for (int i = 0; i < content.length(); i++) {
final char c = content.charAt(i);
if (escaped) {
escaped = false;
} else if (c == '\\') {
escaped = true;
} else if (c == '"') {
inQuotes = !inQuotes;
} else if (!inQuotes && c == target) {
return i;
}
}
return -1;
}

/**
Expand Down
Loading
Loading