Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions schema_analysis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ serde = { version = "1.0", features = [ "serde_derive" ] }
regex = "1.5" # Used to detect interesting strings
once_cell = "1.8" # For global constants that require allocation
ordered-float = { version = "3.4", features = [ "serde" ] } # To save sets of floats
ordermap = { version = "0.5", features = [ "serde" ] } # To preserve the order of fields in the schema

# These are used to allow the users of the library to run
# custom analysis on the nodes. Check src/context/aggregators.rs
Expand Down
5 changes: 2 additions & 3 deletions schema_analysis/src/analysis/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::BTreeMap;

use ordermap::OrderMap;
use serde::de::{Error, Visitor};

use crate::{Aggregate, Field, Schema};
Expand Down Expand Up @@ -173,7 +172,7 @@ impl<'de> Visitor<'de> for SchemaVisitor<'_> {
A: serde::de::MapAccess<'de>,
{
let mut keys = Vec::new();
let mut fields: BTreeMap<String, Field> = BTreeMap::new();
let mut fields: OrderMap<String, Field> = OrderMap::new();

while let Some(key) = map.next_key::<String>()? {
match fields.get_mut(&key) {
Expand Down
152 changes: 90 additions & 62 deletions schema_analysis/src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::BTreeMap;

use ordermap::OrderMap;
use serde::{Deserialize, Serialize};

use crate::{
Expand Down Expand Up @@ -47,7 +46,7 @@ pub enum Schema {
Struct {
/// Each [String] key gets assigned a [Field].
/// Currently we are using a [BTreeMap], but that might change in the future.
fields: BTreeMap<String, Field>,
fields: OrderMap<String, Field>,
/// The context aggregates information about the struct.
/// It is passed a vector of the key names.
context: MapStructContext,
Expand Down Expand Up @@ -101,6 +100,59 @@ pub struct FieldStatus {
//
// Schema implementations
//
impl Schema {
/// Sorts the fields of the schema by their name (using [String::cmp]).
pub fn sort_fields(&mut self) {
match self {
Schema::Null(_)
| Schema::Boolean(_)
| Schema::Integer(_)
| Schema::Float(_)
| Schema::String(_)
| Schema::Bytes(_) => {}
Schema::Sequence { field, context: _ } => {
field.sort_fields();
}
Schema::Struct { fields, context: _ } => {
fields.sort_keys();
for field in fields.values_mut() {
field.sort_fields();
}
}
Schema::Union { variants } => {
variants.sort_by(schema_cmp);
for variant in variants {
variant.sort_fields();
}
}
}
}
/// Sorts/normalises the order of [Schema::Union] variants.
pub fn sort_variants(&mut self) {
match self {
Schema::Null(_)
| Schema::Boolean(_)
| Schema::Integer(_)
| Schema::Float(_)
| Schema::String(_)
| Schema::Bytes(_) => {}
Schema::Sequence { field, context: _ } => {
field.sort_variants();
}
Schema::Struct { fields, context: _ } => {
for field in fields.values_mut() {
field.sort_variants();
}
}
Schema::Union { variants } => {
variants.sort_by(schema_cmp);
for variant in variants {
variant.sort_variants();
}
}
}
}
}
impl StructuralEq for Schema {
fn structural_eq(&self, other: &Self) -> bool {
use Schema::*;
Expand All @@ -123,7 +175,15 @@ impl StructuralEq for Schema {
Struct {
fields: fields_2, ..
},
) => fields_1.structural_eq(fields_2),
) => {
fields_1.len() == fields_2.len()
&& fields_1.iter().all(|(sk, sv)| {
let Some(ov) = fields_2.get(sk) else {
return false;
};
sv.structural_eq(ov)
})
}

(Union { variants: s }, Union { variants: o }) => {
let mut s = s.clone();
Expand Down Expand Up @@ -378,6 +438,17 @@ impl Field {
schema: Some(schema),
}
}

fn sort_fields(&mut self) {
if let Some(schema) = &mut self.schema {
schema.sort_fields();
}
}
fn sort_variants(&mut self) {
if let Some(schema) = &mut self.schema {
schema.sort_variants();
}
}
}
impl Coalesce for Field {
fn coalesce(&mut self, other: Self)
Expand Down Expand Up @@ -437,63 +508,20 @@ impl Coalesce for FieldStatus {
/// Since a [Schema::Union] should never hold two schemas of the same type, it is enough to
/// just compare the top level without recursion.
fn schema_cmp(first: &Schema, second: &Schema) -> std::cmp::Ordering {
use std::cmp::Ordering::*;
use Schema::*;
match first {
Null(_) => match second {
Null(_) => Equal,
_ => Less,
},
Boolean(_) => match second {
Null(_) | Boolean(_) => Equal,
_ => Less,
},
Integer(_) => match second {
Null(_) | Boolean(_) => Greater,
Integer(_) => Equal,
_ => Less,
},
Float(_) => match second {
Null(_) | Boolean(_) | Integer(_) => Greater,
Float(_) => Equal,
_ => Less,
},
String(_) => match second {
Null(_) | Boolean(_) | Integer(_) | Float(_) => Greater,
String(_) => Equal,
_ => Less,
},
Bytes(_) => match second {
Null(_) | Boolean(_) | Integer(_) | Float(_) | String(_) => Greater,
Bytes(_) => Equal,
_ => Less,
},
Sequence { .. } => match second {
Null(_) | Boolean(_) | Integer(_) | Float(_) | String(_) | Bytes(_) => Greater,
Sequence { .. } => Equal,
_ => Less,
},
Struct { .. } => match second {
Null(_)
| Boolean(_)
| Integer(_)
| Float(_)
| String(_)
| Bytes(_)
| Sequence { .. } => Greater,
Struct { .. } => Equal,
_ => Less,
},
Union { .. } => match second {
Null(_)
| Boolean(_)
| Integer(_)
| Float(_)
| String(_)
| Bytes(_)
| Sequence { .. }
| Struct { .. } => Greater,
Union { .. } => Equal,
},
fn ordering(v: &Schema) -> u8 {
use Schema::*;

match v {
Null(_) => 0,
Boolean(_) => 1,
Integer(_) => 2,
Float(_) => 3,
String(_) => 4,
Bytes(_) => 5,
Sequence { .. } => 6,
Struct { .. } => 7,
Union { .. } => 8,
}
}
Ord::cmp(&ordering(first), &ordering(second))
}
11 changes: 1 addition & 10 deletions schema_analysis/src/traits.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A module holding the crate's public traits.

use std::{any::Any, collections::BTreeMap, fmt::Debug};
use std::{any::Any, fmt::Debug};

use downcast_rs::Downcast;

Expand Down Expand Up @@ -120,12 +120,3 @@ impl<T: StructuralEq> StructuralEq for Option<T> {
}
}
}
impl<K: StructuralEq, V: StructuralEq> StructuralEq for BTreeMap<K, V> {
fn structural_eq(&self, other: &Self) -> bool {
self.len() == other.len()
&& self
.iter()
.zip(other)
.all(|((sk, sv), (ok, ov))| sk.structural_eq(ok) && sv.structural_eq(ov))
}
}
64 changes: 30 additions & 34 deletions schema_analysis/tests/shared/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,7 @@ macro_rules! test_format {
}

mod targets {
use std::collections::BTreeMap;

use maplit::btreemap;

use ordermap::OrderMap;
use schema_analysis::{Field, FieldStatus, Schema};

pub fn null() -> Schema {
Expand Down Expand Up @@ -272,7 +269,7 @@ mod targets {
}

pub fn empty_map_struct() -> Schema {
let field_schemas: BTreeMap<String, Field> = BTreeMap::new();
let field_schemas: OrderMap<String, Field> = OrderMap::new();
Schema::Struct {
fields: field_schemas,
context: Default::default(),
Expand All @@ -285,9 +282,7 @@ mod targets {
schema: Some(Schema::Integer(Default::default())),
};
hello_field.status.may_be_normal = true;
btreemap! {
"hello".into() => hello_field
}
[("hello".into(), hello_field)].into()
};
Schema::Struct {
fields,
Expand All @@ -306,10 +301,7 @@ mod targets {
schema: Some(Schema::String(Default::default())),
};
world_field.status.may_be_normal = true;
btreemap! {
"hello".into() => hello_field,
"world".into() => world_field,
}
[("hello".into(), hello_field), ("world".into(), world_field)].into()
};
Schema::Struct {
fields,
Expand All @@ -332,11 +324,12 @@ mod targets {
});
mixed_field.status.may_be_normal = true;

btreemap! {
"hello".into() => hello_field,
"world".into() => world_field,
"mixed".into() => mixed_field,
}
[
("hello".into(), hello_field),
("world".into(), world_field),
("mixed".into(), mixed_field),
]
.into()
};

let mut element_field = Field::with_schema(Schema::Struct {
Expand Down Expand Up @@ -367,12 +360,13 @@ mod targets {
null_or_missing_field.status.may_be_null = true;
null_or_missing_field.status.may_be_missing = true;

btreemap! {
"hello".into() => hello_field,
"possibly_null".into() => possibly_null_field,
"possibly_missing".into() => possibly_missing_field,
"null_or_missing".into() => null_or_missing_field,
}
[
("hello".into(), hello_field),
("possibly_null".into(), possibly_null_field),
("possibly_missing".into(), possibly_missing_field),
("null_or_missing".into(), null_or_missing_field),
]
.into()
};

let mut element_field = Field::with_schema(Schema::Struct {
Expand Down Expand Up @@ -406,11 +400,12 @@ mod targets {
};
sequence_field.status.may_be_normal = true;

btreemap! {
"hello".into() => hello_field,
"world".into() => world_field,
"sequence".into() => sequence_field,
}
[
("hello".into(), hello_field),
("world".into(), world_field),
("sequence".into(), sequence_field),
]
.into()
};
Schema::Struct {
fields,
Expand Down Expand Up @@ -441,12 +436,13 @@ mod targets {
};
sequence_field.status.may_be_normal = true;

btreemap! {
"hello".into() => hello_field,
"world".into() => world_field,
"optional".into() => optional_field,
"sequence".into() => sequence_field,
}
[
("hello".into(), hello_field),
("world".into(), world_field),
("optional".into(), optional_field),
("sequence".into(), sequence_field),
]
.into()
};
Schema::Struct {
fields,
Expand Down
Loading
Loading