NIFI-15047 - Initial JSON Schemas support for NiFi Records#10380
NIFI-15047 - Initial JSON Schemas support for NiFi Records#10380pvillard31 wants to merge 3 commits intoapache:mainfrom
Conversation
31e90be to
0724b08
Compare
exceptionfactory
left a comment
There was a problem hiding this comment.
Thanks for the work on putting this together @pvillard31. I think general support for JSON Schemas would be a very helpful addition, but unfortunately available review cycles have been lacking to pick this up.
Although it can be helpful to have the various elements together, I think it would be helpful to consider ways to introduce this in a more incremental fashion. For example, focusing on the nifi-record changes individually would be helpful to address questions there. Having the shared JSON Schema changes would help consider those clearly. Decoupling the Confluent Schema Registry updates would be helpful as a follow on, after the initial support is implemented.
That's just an option to consider ways to make some incremental progress, otherwise I think it may be best to revisit this as a whole down the road.
Summary
NIFI-15047 - Initial JSON Schemas support for NiFi Records
Note - the PR is currently in DRAFT given this is a very large change that will likely require many rounds of discussions and review cycles.
nifi-json-schema-shared, plus test coverage for conversion and keyword handlingRecordField/SimpleRecordSchemawith validator support and invoke them fromStandardSchemaValidatorJsonSchemaRegistryServicewith JSON schema registration, retrieval, and definition exposure; register service and allow JSON schema typeValidateRecordto capture field- and record-level validation issues with richer provenance messaging; add integration tests using the new registryIn addition to the tests added in the code, I used a flow in the form of:
To confirm the handling of valid/invalid records with the below scenarios:
Minimal Person
Schema
{ "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } }Payload
[{ "firstName": "John", "lastName": "Doe", "age": 21 },{ "firstName": "Jane", "lastName": "Doe", "age": -1 }]Object Constraints
Schema
{ "$id": "urn:nifi:test:object-constraints", "title": "ObjectConstraints", "type": "object", "additionalProperties": true, "minProperties": 4, "maxProperties": 4, "required": [ "id", "nickname", "metadata" ], "properties": { "id": { "type": "string", "minLength": 3 }, "nickname": { "type": [ "string", "null" ], "maxLength": 10 }, "metadata": { "type": "object", "required": [ "active" ], "additionalProperties": false, "properties": { "active": { "type": "boolean" }, "level": { "type": [ "integer", "null" ], "minimum": 1 } } }, "note": { "type": [ "string", "null" ] } } }Payload
[ { "testCase": "valid", "id": "abc123", "nickname": null, "metadata": { "active": true, "level": 5 } }, { "testCase": "invalid-missing-nickname", "id": "short", "metadata": { "active": true } }, { "testCase": "invalid-too-many-properties", "id": "abc123", "nickname": "ally", "metadata": { "active": false, "level": 3 }, "note": "extra info" }, { "testCase": "invalid-additional-property", "id": "abc123", "nickname": "ally", "metadata": { "active": true }, "unexpected": "not allowed" } ]String Constraints
Schema
{ "$id": "urn:nifi:test:string-constraints", "title": "StringConstraints", "type": "object", "additionalProperties": true, "required": [ "sku", "releaseDate", "releaseTime", "lastUpdated", "itemUuid" ], "properties": { "sku": { "type": "string", "minLength": 7, "maxLength": 7, "pattern": "^[A-Z]{3}-[0-9]{3}$" }, "releaseDate": { "type": "string", "format": "date" }, "releaseTime": { "type": "string", "format": "time" }, "lastUpdated": { "type": "string", "format": "date-time" }, "itemUuid": { "type": "string", "format": "uuid" }, "description": { "type": [ "string", "null" ], "maxLength": 20 } } }Payload
[ { "testCase": "valid", "sku": "ABC-123", "releaseDate": "2024-01-15", "releaseTime": "13:45:00", "lastUpdated": "2024-01-15T13:45:00Z", "itemUuid": "123e4567-e89b-12d3-a456-426614174000", "description": "Launch batch" }, { "testCase": "invalid-pattern-and-format", "sku": "ab-12", "releaseDate": "2024-13-01", "releaseTime": "25:61:00", "lastUpdated": "not-a-timestamp", "itemUuid": "not-a-uuid", "description": "This description is way too long to be allowed" } ]Numeric Constraints
Schema
{ "$id": "urn:nifi:test:numeric-constraints", "title": "NumericConstraints", "type": "object", "additionalProperties": true, "required": [ "quantity", "ratio", "step" ], "properties": { "quantity": { "type": "integer", "minimum": 1, "maximum": 100 }, "ratio": { "type": "number", "exclusiveMinimum": 0, "exclusiveMaximum": 1 }, "step": { "type": "number", "multipleOf": 0.25 } } }Payload
[ { "testCase": "valid", "quantity": 10, "ratio": 0.5, "step": 1.75 }, { "testCase": "invalid-range-and-step", "quantity": 0, "ratio": 1, "step": 0.3 } ]Enum and Constants
Schema
{ "$id": "urn:nifi:test:enum-const", "title": "EnumAndConst", "type": "object", "additionalProperties": true, "required": [ "status", "region" ], "properties": { "status": { "type": "string", "enum": [ "OPEN", "CLOSED", "ON_HOLD" ] }, "region": { "const": "NA" }, "priority": { "type": "integer", "enum": [ 1, 2, 3 ] }, "alias": { "type": [ "string", "null" ] } } }Payload
[ { "testCase": "valid", "status": "OPEN", "region": "NA", "priority": 2, "alias": null }, { "testCase": "invalid-enum-and-const", "status": "INVALID", "region": "EU", "priority": 5 } ]Arrays and Nested Objects
Schema
{ "$id": "urn:nifi:test:array-constraints", "title": "ArrayConstraints", "type": "object", "additionalProperties": true, "required": [ "tags", "participants" ], "properties": { "tags": { "type": "array", "items": { "type": "string", "minLength": 2 }, "minItems": 1, "maxItems": 4, "uniqueItems": true }, "participants": { "type": "array", "minItems": 1, "items": { "type": "object", "required": [ "name" ], "additionalProperties": false, "properties": { "name": { "type": "string", "minLength": 1 }, "age": { "type": [ "integer", "null" ], "minimum": 0 } } } } } }Payload
[ { "testCase": "valid", "tags": [ "alpha", "beta" ], "participants": [ { "name": "Alice", "age": 30 }, { "name": "Bob", "age": null } ] }, { "testCase": "invalid-duplicate-tags", "tags": [ "alpha", "alpha" ], "participants": [ { "name": "Carol", "age": 25 } ] }, { "testCase": "invalid-empty-tags", "tags": [], "participants": [ { "name": "Dana", "age": 28 } ] }, { "testCase": "invalid-too-many-tags", "tags": [ "aa", "bb", "cc", "dd", "ee" ], "participants": [ { "name": "Evan", "age": 32 } ] }, { "testCase": "invalid-missing-participant-name", "tags": [ "alpha", "delta" ], "participants": [ { "age": 22 } ] }, { "testCase": "invalid-empty-participants", "tags": [ "alpha", "gamma" ], "participants": [] } ]Dynamic Headers 1
Schema
{ "$id": "urn:test:dynamic-headers", "type": "object", "additionalProperties": false, "properties": { "fixed": { "type": "string" } }, "patternProperties": { "^x-": { "type": "string", "minLength": 2 } }, "required": [ "fixed" ] }Payload
[ { "fixed": "value", "x-trace": "abc123", "x-user": "js" }, { "fixed": "value", "x-short": "a", "z-other": "not allowed" } ]Dynamic Headers 2
Schema
{ "$id": "urn:test:device-metrics", "type": "object", "additionalProperties": false, "minProperties": 2, "patternProperties": { "^device_[0-9]+$": { "type": "object", "required": [ "temperature", "status" ], "properties": { "temperature": { "type": "number", "minimum": -40, "maximum": 85 }, "status": { "type": "string", "enum": [ "OK", "WARN", "FAIL" ] } }, "additionalProperties": false } }, "properties": { "batchId": { "type": "string", "minLength": 8 } }, "required": [ "batchId" ] }Payload
[ { "batchId": "2024-09-30", "device_101": { "temperature": 21.5, "status": "OK" }, "device_202": { "temperature": 42, "status": "WARN" } }, { "batchId": "2024-09-30", "device_A12": { "temperature": 20, "status": "OK" }, "device_303": { "temperature": 120, "status": "OK" } } ]