Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
*/
package com.marklogic.client.datamovement.filter;

import com.fasterxml.jackson.core.JsonPointer;
import com.marklogic.client.datamovement.DocumentWriteSetFilter;
import com.marklogic.client.document.DocumentWriteOperation;
import com.marklogic.client.document.DocumentWriteSet;
import com.marklogic.client.impl.DocumentWriteOperationImpl;
import com.marklogic.client.impl.HandleAccessor;
import com.marklogic.client.impl.XmlFactories;
import com.marklogic.client.io.BaseHandle;
import com.marklogic.client.io.DocumentMetadataHandle;
import com.marklogic.client.io.Format;
Expand All @@ -16,6 +18,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
Expand Down Expand Up @@ -114,11 +118,53 @@ public Builder xmlExclusions(String... xpathExpressions) {
}

public IncrementalWriteFilter build() {
validateJsonExclusions();
validateXmlExclusions();
if (useEvalQuery) {
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
}
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
}

private void validateJsonExclusions() {
if (jsonExclusions == null) {
return;
}
for (String jsonPointer : jsonExclusions) {
if (jsonPointer == null || jsonPointer.trim().isEmpty()) {
throw new IllegalArgumentException(
"Empty JSON Pointer expression is not valid for excluding content from incremental write hash calculation; " +
"it would exclude the entire document. JSON Pointer expressions must start with '/'.");
}
try {
JsonPointer.compile(jsonPointer);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
String.format("Invalid JSON Pointer expression '%s' for excluding content from incremental write hash calculation. " +
"JSON Pointer expressions must start with '/'; cause: %s", jsonPointer, e.getMessage()), e);
}
}
}

private void validateXmlExclusions() {
if (xmlExclusions == null) {
return;
}
XPath xpath = XmlFactories.getXPathFactory().newXPath();
for (String xpathExpression : xmlExclusions) {
if (xpathExpression == null || xpathExpression.trim().isEmpty()) {
throw new IllegalArgumentException(
"Empty XPath expression is not valid for excluding content from incremental write hash calculation.");
}
try {
xpath.compile(xpathExpression);
} catch (XPathExpressionException e) {
throw new IllegalArgumentException(
String.format("Invalid XPath expression '%s' for excluding content from incremental write hash calculation; cause: %s",
xpathExpression, e.getMessage()), e);
}
}
}
}

protected final String hashKeyName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,107 @@ void xmlExclusions() {
assertEquals(10, writtenCount.get(), "Documents should be written since non-excluded content changed");
assertEquals(5, skippedCount.get(), "Skip count should remain at 5");
}

/**
* Verifies that JSON Pointer exclusions are only applied to JSON documents and are ignored for XML documents.
* The XML document should use its full content for hashing since no XML exclusions are configured.
*/
@Test
void jsonExclusionsIgnoredForXmlDocuments() {
filter = IncrementalWriteFilter.newBuilder()
.jsonExclusions("/timestamp")
.onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length))
.build();

// Write one JSON doc and one XML doc
docs = new ArrayList<>();
ObjectNode jsonDoc = objectMapper.createObjectNode();
jsonDoc.put("id", 1);
jsonDoc.put("timestamp", "2025-01-01T10:00:00Z");
docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.json", METADATA, new JacksonHandle(jsonDoc)));

String xmlDoc = "<doc><id>1</id><timestamp>2025-01-01T10:00:00Z</timestamp></doc>";
docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.xml", METADATA, new StringHandle(xmlDoc).withFormat(Format.XML)));

writeDocs(docs);
assertEquals(2, writtenCount.get());
assertEquals(0, skippedCount.get());

// Write again with different timestamp values
docs = new ArrayList<>();
jsonDoc = objectMapper.createObjectNode();
jsonDoc.put("id", 1);
jsonDoc.put("timestamp", "2026-01-02T15:30:00Z"); // Changed
docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.json", METADATA, new JacksonHandle(jsonDoc)));

xmlDoc = "<doc><id>1</id><timestamp>2026-01-02T15:30:00Z</timestamp></doc>"; // Changed
docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.xml", METADATA, new StringHandle(xmlDoc).withFormat(Format.XML)));

writeDocs(docs);
assertEquals(3, writtenCount.get(), "XML doc should be written since its timestamp changed and no XML exclusions are configured");
assertEquals(1, skippedCount.get(), "JSON doc should be skipped since only the excluded timestamp field changed");
}

/**
* Verifies that when canonicalizeJson is false, documents with logically identical content
* but different key ordering will produce different hashes, causing a write to occur.
*/
@Test
void jsonNotCanonicalizedCausesDifferentHashForReorderedKeys() {
filter = IncrementalWriteFilter.newBuilder()
.canonicalizeJson(false)
.onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length))
.build();

// Write initial document with keys in a specific order
docs = new ArrayList<>();
String json1 = "{\"name\":\"Test\",\"id\":1,\"value\":100}";
docs.add(new DocumentWriteOperationImpl("/incremental/test/non-canonical.json", METADATA,
new StringHandle(json1).withFormat(Format.JSON)));

writeDocs(docs);
assertEquals(1, writtenCount.get());
assertEquals(0, skippedCount.get());

// Write again with same logical content but different key order
docs = new ArrayList<>();
String json2 = "{\"id\":1,\"value\":100,\"name\":\"Test\"}";
docs.add(new DocumentWriteOperationImpl("/incremental/test/non-canonical.json", METADATA,
new StringHandle(json2).withFormat(Format.JSON)));

writeDocs(docs);
assertEquals(2, writtenCount.get(), "Document should be written because key order differs and JSON is not canonicalized");
assertEquals(0, skippedCount.get(), "No documents should be skipped");
}

/**
* Verifies that with the default canonicalizeJson(true), documents with logically identical content
* but different key ordering will produce the same hash, causing the document to be skipped.
*/
@Test
void jsonCanonicalizedProducesSameHashForReorderedKeys() {
filter = IncrementalWriteFilter.newBuilder()
.onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length))
.build();

// Write initial document with keys in a specific order
docs = new ArrayList<>();
String json1 = "{\"name\":\"Test\",\"id\":1,\"value\":100}";
docs.add(new DocumentWriteOperationImpl("/incremental/test/canonical.json", METADATA,
new StringHandle(json1).withFormat(Format.JSON)));

writeDocs(docs);
assertEquals(1, writtenCount.get());
assertEquals(0, skippedCount.get());

// Write again with same logical content but different key order
docs = new ArrayList<>();
String json2 = "{\"id\":1,\"value\":100,\"name\":\"Test\"}";
docs.add(new DocumentWriteOperationImpl("/incremental/test/canonical.json", METADATA,
new StringHandle(json2).withFormat(Format.JSON)));

writeDocs(docs);
assertEquals(1, writtenCount.get(), "Document should be skipped because canonicalized JSON produces the same hash");
assertEquals(1, skippedCount.get(), "One document should be skipped");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

class ApplyInvalidExclusionsToIncrementalWriteTest extends AbstractIncrementalWriteTest {

/**
* Verifies that an invalid JSON Pointer expression (missing leading slash) causes the build to fail
* immediately, allowing the user to fix the configuration before any documents are processed.
*/
@Test
void invalidJsonPointerExpression() {
IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder()
.jsonExclusions("timestamp"); // Invalid - missing leading slash

IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build);

assertTrue(ex.getMessage().contains("Invalid JSON Pointer expression 'timestamp'"),
"Error message should include the invalid expression. Actual: " + ex.getMessage());
assertTrue(ex.getMessage().contains("incremental write"),
"Error message should mention incremental write context. Actual: " + ex.getMessage());
assertTrue(ex.getMessage().contains("must start with '/'"),
"Error message should hint at the fix. Actual: " + ex.getMessage());
}

/**
* Verifies that an empty JSON Pointer expression is rejected since it would exclude the entire document,
* leaving nothing to hash.
*/
@Test
void emptyJsonPointerExpression() {
IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder()
.jsonExclusions(""); // Invalid - would exclude entire document

IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build);

assertTrue(ex.getMessage().contains("Empty JSON Pointer expression"),
"Error message should indicate empty expression. Actual: " + ex.getMessage());
assertTrue(ex.getMessage().contains("would exclude the entire document"),
"Error message should explain why it's invalid. Actual: " + ex.getMessage());
}

/**
* Verifies that an invalid XPath expression causes the build to fail immediately,
* allowing the user to fix the configuration before any documents are processed.
*/
@Test
void invalidXPathExpression() {
IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder()
.xmlExclusions("[[[invalid xpath"); // Invalid XPath syntax

IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build);

assertTrue(ex.getMessage().contains("Invalid XPath expression '[[[invalid xpath'"),
"Error message should include the invalid expression. Actual: " + ex.getMessage());
assertTrue(ex.getMessage().contains("incremental write"),
"Error message should mention incremental write context. Actual: " + ex.getMessage());
}

}