Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .changeset/add-jsonb-operators.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
'@core/sync-service': patch
---

Add JSONB operators for where clauses:
- `->` and `->>` for field access (returns JSONB and text respectively)
- `@>` and `<@` for containment checks
- `?`, `?|`, and `?&` for key existence checks

Example shape requests:

```
# Filter by nested field value
GET /v1/shapes?table=users&where=(metadata ->> 'status') = 'active'

# Filter by JSON containment
GET /v1/shapes?table=orders&where=data @> '{"type": "premium"}'

# Filter by key existence
GET /v1/shapes?table=events&where=payload ? 'user_id'
Comment on lines +12 to +20
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Consider adding a language specifier for the code block.

The fenced code block could benefit from a language specifier for better syntax highlighting. Since these are HTTP requests, consider using http or bash.

-```
+```http
 # Filter by nested field value
 GET /v1/shapes?table=users&where=(metadata ->> 'status') = 'active'
🧰 Tools
🪛 markdownlint-cli2 (0.18.1)

12-12: Fenced code blocks should have a language specified

(MD040, fenced-code-language)

🤖 Prompt for AI Agents
In @.changeset/add-jsonb-operators.md around lines 12 - 20, Add an HTTP language
specifier to the fenced code block in .changeset/add-jsonb-operators.md so the
examples render with proper syntax highlighting; replace the opening fence "```"
with "```http" (or "```bash") for the block containing the three GET examples
that reference JSONB operators like (metadata ->> 'status'), @>, and ? to ensure
clients and reviewers see correct highlighting.

```
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ defmodule Electric.Replication.Eval.Env.BasicTypes do
| unknown | unknown | |
| bytea | user | |
| uuid | user | |
| jsonb | user | |
| anyarray | pseudo | |
| anycompatible | pseudo | |
| anycompatiblearray | pseudo | |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,178 @@ defmodule Electric.Replication.Eval.Env.KnownFunctions do
defpostgres("anyenum = anyenum -> bool", delegate: &Kernel.==/2)
defpostgres("anyenum <> anyenum -> bool", delegate: &Kernel.!=/2)

## JSONB functions

# Parse text to jsonb
defpostgres "jsonb(text) -> jsonb" do
def parse_jsonb(text) when is_binary(text), do: Jason.decode!(text)
end

# Output jsonb as text
defpostgres "jsonbout(jsonb) -> text" do
def jsonb_out(nil), do: nil
def jsonb_out(value), do: Jason.encode!(value)
end

# Get object field or array element as jsonb
defpostgres "jsonb -> text -> jsonb" do
def jsonb_get_by_key(json, key) when is_map(json) and is_binary(key) do
Map.get(json, key)
end

def jsonb_get_by_key(_, _), do: nil
end

defpostgres "jsonb -> int4 -> jsonb" do
def jsonb_get_by_index(json, index) when is_list(json) and is_integer(index) do
# PostgreSQL uses 0-based indexing for JSON arrays
# Negative indices count from the end (e.g., -1 is last element)
Enum.at(json, index)
end

def jsonb_get_by_index(_, _), do: nil
end
Comment on lines +176 to +184
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Negative index handling does not match PostgreSQL semantics.

PostgreSQL's jsonb -> int4 operator returns NULL for negative indices, but Enum.at/2 in Elixir supports negative indexing (e.g., -1 returns the last element). The PR summary confirms the intended behavior is "negative indices return NULL."

🐛 Proposed fix
 defpostgres "jsonb -> int4 -> jsonb" do
   def jsonb_get_by_index(json, index) when is_list(json) and is_integer(index) do
-    # PostgreSQL uses 0-based indexing for JSON arrays
-    # Negative indices count from the end (e.g., -1 is last element)
-    Enum.at(json, index)
+    # PostgreSQL uses 0-based indexing for JSON arrays
+    # Negative indices return NULL (unlike Elixir's Enum.at)
+    if index >= 0, do: Enum.at(json, index), else: nil
   end

   def jsonb_get_by_index(_, _), do: nil
 end
🤖 Prompt for AI Agents
In `@packages/sync-service/lib/electric/replication/eval/env/known_functions.ex`
around lines 176 - 184, The jsonb_get_by_index/2 implementation uses Enum.at/2
which treats negative indices as offsets from the end, but PostgreSQL semantics
require returning NULL for any negative index; update jsonb_get_by_index/2 (the
clause inside defpostgres "jsonb -> int4 -> jsonb") to explicitly return nil
when index is negative (e.g., guard or conditional index < 0), and only call
Enum.at(json, index) for non-negative indices while keeping the fallback clause
def jsonb_get_by_index(_, _), do: nil unchanged.


# Get object field or array element as text
defpostgres "jsonb ->> text -> text" do
def jsonb_get_text_by_key(json, key) when is_map(json) and is_binary(key) do
case Map.get(json, key) do
nil -> nil
value -> jsonb_value_to_text(value)
end
end

def jsonb_get_text_by_key(_, _), do: nil

defp jsonb_value_to_text(value) when is_binary(value), do: value
defp jsonb_value_to_text(value) when is_integer(value), do: Integer.to_string(value)
defp jsonb_value_to_text(value) when is_float(value), do: Float.to_string(value)
defp jsonb_value_to_text(true), do: "true"
defp jsonb_value_to_text(false), do: "false"
defp jsonb_value_to_text(nil), do: nil
# For nested objects/arrays, return JSON string
defp jsonb_value_to_text(value), do: Jason.encode!(value)
end
Comment on lines +186 to +205
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check all occurrences of jsonb_value_to_text in the file
rg -n "jsonb_value_to_text" packages/sync-service/lib/electric/replication/eval/env/known_functions.ex -B2 -A2

Repository: electric-sql/electric

Length of output: 798


🏁 Script executed:

#!/bin/bash
# Look at the jsonb ->> int4 operator implementation to see if it also uses jsonb_value_to_text
rg -n "jsonb.*int4|int4.*jsonb" packages/sync-service/lib/electric/replication/eval/env/known_functions.ex -A20

Repository: electric-sql/electric

Length of output: 1521


🏁 Script executed:

#!/bin/bash
# Get broader context around lines 190-209 to understand the defpostgres structure
sed -n '180,220p' packages/sync-service/lib/electric/replication/eval/env/known_functions.ex

Repository: electric-sql/electric

Length of output: 1481


Extract the value-to-text conversion logic as a shared module-level helper to avoid duplication.

The jsonb_value_to_text helper (lines 201–208) handles the conversion logic, but it's scoped privately within the first defpostgres block and cannot be accessed by the second operator (jsonb ->> int4 -> text at line 211). Consequently, the same conversion logic is duplicated inline in lines 216–221 of the second operator. Extract this as a module-level private function to eliminate duplication and simplify maintenance.

The PostgreSQL null behavior is correctly implemented: jsonb_value_to_text(nil) returns Elixir nil, which correctly maps to SQL NULL.

🤖 Prompt for AI Agents
In `@packages/sync-service/lib/electric/replication/eval/env/known_functions.ex`
around lines 190 - 209, Extract the jsonb_value_to_text clauses into a single
module-level private helper and have both PostgreSQL operator implementations
call it: move the defp jsonb_value_to_text(...) definitions out of the first
defpostgres block to the module scope as a private function, then in
jsonb_get_text_by_key (the "jsonb ->> text -> text" implementation) and in the
second operator implementation ("jsonb ->> int4 -> text") replace the duplicated
inline conversion logic with calls to jsonb_value_to_text/1; ensure the function
signatures and nil handling remain identical so SQL NULL behavior is preserved.


defpostgres "jsonb ->> int4 -> text" do
def jsonb_get_text_by_index(json, index) when is_list(json) and is_integer(index) do
# Negative indices count from the end (e.g., -1 is last element)
json |> Enum.at(index) |> jsonb_value_to_text()
end

def jsonb_get_text_by_index(_, _), do: nil
end
Comment on lines +207 to +214
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Undefined function: jsonb_value_to_text/1 is not defined in this defpostgres block.

Line 210 calls jsonb_value_to_text(), but this helper is privately defined inside a separate defpostgres block (lines 197-204). Since each defpostgres block is isolated, this will cause an undefined function error at runtime.

Additionally, the same negative index issue exists here - the implementation should return NULL for negative indices per PostgreSQL semantics.

🐛 Proposed fix - inline the helper and fix negative index handling
 defpostgres "jsonb ->> int4 -> text" do
   def jsonb_get_text_by_index(json, index) when is_list(json) and is_integer(index) do
-    # Negative indices count from the end (e.g., -1 is last element)
-    json |> Enum.at(index) |> jsonb_value_to_text()
+    # Negative indices return NULL (PostgreSQL semantics)
+    if index >= 0 do
+      case Enum.at(json, index) do
+        nil -> nil
+        value -> jsonb_value_to_text(value)
+      end
+    else
+      nil
+    end
   end

   def jsonb_get_text_by_index(_, _), do: nil
+
+  defp jsonb_value_to_text(value) when is_binary(value), do: value
+  defp jsonb_value_to_text(value) when is_integer(value), do: Integer.to_string(value)
+  defp jsonb_value_to_text(value) when is_float(value), do: Float.to_string(value)
+  defp jsonb_value_to_text(true), do: "true"
+  defp jsonb_value_to_text(false), do: "false"
+  defp jsonb_value_to_text(nil), do: nil
+  defp jsonb_value_to_text(value), do: Jason.encode!(value)
 end

Alternatively, consider extracting jsonb_value_to_text/1 as a module-level private function outside the defpostgres blocks so both operators can share it, as suggested in previous reviews.

🤖 Prompt for AI Agents
In `@packages/sync-service/lib/electric/replication/eval/env/known_functions.ex`
around lines 207 - 214, The jsonb_get_text_by_index defpostgres block calls
jsonb_value_to_text/1 which is defined in a different defpostgres block and
therefore unavailable; inline the helper logic (or move jsonb_value_to_text/1 to
a module-level private function) into the same scope as jsonb_get_text_by_index
so the function exists at runtime, and adjust the index handling in
jsonb_get_text_by_index to return nil for negative indices (Postgres treats
negative indexes as NULL) instead of attempting Enum.at with negative values;
ensure you update the pattern-matching clause for jsonb_get_text_by_index and/or
add a guard to reject negative indices and call the inlined/shared
jsonb_value_to_text logic.


# JSONB equality
defpostgres("jsonb = jsonb -> bool", delegate: &Kernel.==/2)
defpostgres("jsonb <> jsonb -> bool", delegate: &Kernel.!=/2)

# JSONB containment operators
# @> checks if left contains right, <@ checks if left is contained by right
defpostgres "jsonb @> jsonb -> bool" do
# Top-level: Arrays may contain primitive values (special-case in Postgres)
# e.g., '["foo", "bar"]'::jsonb @> '"bar"'::jsonb returns true
# This only applies at the top level, not in recursive array element checks
def jsonb_contains?(left, right)
when is_list(left) and not is_list(right) and not is_map(right) do
Enum.member?(left, right)
end

def jsonb_contains?(left, right), do: do_jsonb_contains?(left, right)

# Objects: all key-value pairs in right must exist in left
defp do_jsonb_contains?(left, right) when is_map(left) and is_map(right) do
Enum.all?(right, fn {key, right_value} ->
case Map.fetch(left, key) do
{:ok, left_value} -> do_jsonb_contains?(left_value, right_value)
:error -> false
end
end)
end

# Arrays: all elements in right must exist somewhere in left
defp do_jsonb_contains?(left, right) when is_list(left) and is_list(right) do
Enum.all?(right, fn right_elem ->
Enum.any?(left, fn left_elem -> do_jsonb_contains?(left_elem, right_elem) end)
end)
end

# Scalars: must be equal
defp do_jsonb_contains?(left, right), do: left == right
end

# <@ is the inverse of @>: x <@ y is equivalent to y @> x
defpostgres "jsonb <@ jsonb -> bool" do
# Top-level: Arrays may contain primitive values (special-case in Postgres)
# For <@, we check if right (container) is an array containing left (primitive)
def jsonb_contained_by?(left, right)
when is_list(right) and not is_list(left) and not is_map(left) do
Enum.member?(right, left)
end

def jsonb_contained_by?(left, right), do: do_jsonb_contained_by?(right, left)

defp do_jsonb_contained_by?(left, right) when is_map(left) and is_map(right) do
Enum.all?(right, fn {key, right_value} ->
case Map.fetch(left, key) do
{:ok, left_value} -> do_jsonb_contained_by?(left_value, right_value)
:error -> false
end
end)
end

defp do_jsonb_contained_by?(left, right) when is_list(left) and is_list(right) do
Enum.all?(right, fn right_elem ->
Enum.any?(left, fn left_elem -> do_jsonb_contained_by?(left_elem, right_elem) end)
end)
end

defp do_jsonb_contained_by?(left, right), do: left == right
end
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# JSONB key existence operators
# ? checks if key exists in object or string exists in array
defpostgres "jsonb ? text -> bool" do
def jsonb_key_exists?(json, key) when is_map(json) and is_binary(key) do
Map.has_key?(json, key)
end

# For arrays, check if the string exists as a top-level element
def jsonb_key_exists?(json, key) when is_list(json) and is_binary(key) do
Enum.member?(json, key)
end

def jsonb_key_exists?(_, _), do: false
end

# ?| checks if any of the keys exist
defpostgres "jsonb ?| text[] -> bool" do
def jsonb_any_key_exists?(json, keys) when is_map(json) and is_list(keys) do
Enum.any?(keys, &Map.has_key?(json, &1))
end

def jsonb_any_key_exists?(json, keys) when is_list(json) and is_list(keys) do
key_set = MapSet.new(keys)
Enum.any?(json, &(&1 in key_set))
end

def jsonb_any_key_exists?(_, _), do: false
end

# ?& checks if all of the keys exist
defpostgres "jsonb ?& text[] -> bool" do
def jsonb_all_keys_exist?(json, keys) when is_map(json) and is_list(keys) do
Enum.all?(keys, &Map.has_key?(json, &1))
end

def jsonb_all_keys_exist?(json, keys) when is_list(json) and is_list(keys) do
key_set = MapSet.new(keys)
Enum.all?(key_set, &Enum.member?(json, &1))
end

def jsonb_all_keys_exist?(_, _), do: false
end
Comment on lines +312 to +324
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Performance inconsistency in ?& for arrays compared to ?|.

The array implementation at lines 292-294 iterates over key_set and performs Enum.member?(json, &1) which is O(n) for each key, resulting in O(keys × json_length) complexity.

For consistency with ?| (lines 278-280), which achieves O(json_length) by iterating over json and checking against a MapSet, consider refactoring:

♻️ Proposed performance improvement
   def jsonb_all_keys_exist?(json, keys) when is_list(json) and is_list(keys) do
-    key_set = MapSet.new(keys)
-    Enum.all?(key_set, &Enum.member?(json, &1))
+    json_set = MapSet.new(json)
+    Enum.all?(keys, &MapSet.member?(json_set, &1))
   end
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# ?& checks if all of the keys exist
defpostgres "jsonb ?& text[] -> bool" do
def jsonb_all_keys_exist?(json, keys) when is_map(json) and is_list(keys) do
Enum.all?(keys, &Map.has_key?(json, &1))
end
def jsonb_all_keys_exist?(json, keys) when is_list(json) and is_list(keys) do
key_set = MapSet.new(keys)
Enum.all?(key_set, &Enum.member?(json, &1))
end
def jsonb_all_keys_exist?(_, _), do: false
end
# ?& checks if all of the keys exist
defpostgres "jsonb ?& text[] -> bool" do
def jsonb_all_keys_exist?(json, keys) when is_map(json) and is_list(keys) do
Enum.all?(keys, &Map.has_key?(json, &1))
end
def jsonb_all_keys_exist?(json, keys) when is_list(json) and is_list(keys) do
json_set = MapSet.new(json)
Enum.all?(keys, &MapSet.member?(json_set, &1))
end
def jsonb_all_keys_exist?(_, _), do: false
end


## Array functions
defpostgres("anyarray = anyarray -> bool", delegate: &Kernel.==/2)
defpostgres("anyarray <> anyarray -> bool", delegate: &Kernel.!=/2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,19 +184,28 @@ defmodule Electric.Replication.Eval.KnownDefinition do
|> Enum.unzip()
end

# Type pattern supports both simple types (int4) and array types (text[])
# Note: [[:alnum:]*_]+ matches alphanumeric, *, and _ (for type categories like *numeric_type*)
# The (?:\[\])? makes the [] suffix optional for array types
@type_pattern "[[:alnum:]*_]+(?:\\[\\])?"

# e.g.: + int4 -> int4
defp unary_op_regex,
do:
~r/^(?<operator>[+\-*\/<>=~!@#%^&|`?]+) (?<type>[[:alnum:]*_]+) -> (?<return_type>[[:alnum:]*_]+)$/
@unary_op_regex Regex.compile!(
"^(?<operator>[+\\-*\\/<>=~!@#%^&|`?]+) (?<type>#{@type_pattern}) -> (?<return_type>#{@type_pattern})$"
)
defp unary_op_regex, do: @unary_op_regex

# e.g.: int4 + int4 -> int4
defp binary_op_regex,
do:
~r/^(?<type1>[[:alnum:]*_]+) (?<operator>[+\-*\/<>=~!@#%^&|`?]+) (?<type2>[[:alnum:]*_]+) -> (?<return_type>[[:alnum:]*_]+)$/
# e.g.: int4 + int4 -> int4, jsonb ?| text[] -> bool
@binary_op_regex Regex.compile!(
"^(?<type1>#{@type_pattern}) (?<operator>[+\\-*\\/<>=~!@#%^&|`?]+) (?<type2>#{@type_pattern}) -> (?<return_type>#{@type_pattern})$"
)
defp binary_op_regex, do: @binary_op_regex

# e.g.: ceil(float4) -> int4
defp func_regex,
do: ~r/^(?<name>[[:alnum:]*_]+)\((?<args>[^\)]*)\) -> (?<return_type>[[:alnum:]*_]+)/
@func_regex Regex.compile!(
"^(?<name>[[:alnum:]*_]+)\\((?<args>[^\\)]*)\\) -> (?<return_type>#{@type_pattern})"
)
defp func_regex, do: @func_regex

defp parse_definition(operator_or_func, caller) do
cond do
Expand Down Expand Up @@ -225,8 +234,8 @@ defmodule Electric.Replication.Eval.KnownDefinition do
kind: :operator,
name: ~s|"#{operator}"|,
arity: 1,
args: [String.to_atom(type)],
returns: String.to_atom(return)
args: [parse_type_string(type)],
returns: parse_type_string(return)
}
end

Expand All @@ -238,11 +247,27 @@ defmodule Electric.Replication.Eval.KnownDefinition do
kind: :operator,
name: ~s|"#{operator}"|,
arity: 2,
args: [String.to_atom(type1), String.to_atom(type2)],
returns: String.to_atom(return)
args: [parse_type_string(type1), parse_type_string(type2)],
returns: parse_type_string(return)
}
end

# Convert type string to type representation
# "text" -> :text, "text[]" -> {:array, :text}
defp parse_type_string(type_str) do
if String.ends_with?(type_str, "[]") do
base_type = type_str |> String.slice(0..-3//1) |> String.to_atom()
{:array, base_type}
else
String.to_atom(type_str)
end
end

# Convert type representation to string (inverse of parse_type_string)
# :text -> "text", {:array, :text} -> "text[]"
defp type_to_string({:array, base_type}), do: "#{base_type}[]"
defp type_to_string(type) when is_atom(type), do: to_string(type)

defp parse_function(function) do
%{"name" => name, "args" => args, "return_type" => return} =
Regex.named_captures(func_regex(), function)
Expand All @@ -252,15 +277,15 @@ defmodule Electric.Replication.Eval.KnownDefinition do
args
|> String.split(",", trim: true)
|> Enum.map(fn arg ->
String.to_atom(String.trim(arg))
arg |> String.trim() |> parse_type_string()
end)

%{
kind: :function,
name: name,
arity: length(arg_types),
args: arg_types,
returns: String.to_atom(return)
returns: parse_type_string(return)
}
end

Expand Down Expand Up @@ -357,7 +382,7 @@ defmodule Electric.Replication.Eval.KnownDefinition do

defp validate_at_most_one_category(%{defined_at: line, args: args} = map) do
args
|> Enum.map(&to_string/1)
|> Enum.map(&type_to_string/1)
|> Enum.filter(&Regex.match?(~r/\*[[:alnum:]_]+\*/, &1))
|> Enum.uniq()
|> case do
Expand Down
2 changes: 2 additions & 0 deletions packages/sync-service/lib/electric/shapes/filter.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ defmodule Electric.Shapes.Filter do
:where_cond_table,
:eq_index_table,
:incl_index_table,
:key_exists_index_table,
:refs_fun
]

Expand All @@ -45,6 +46,7 @@ defmodule Electric.Shapes.Filter do
where_cond_table: :ets.new(:filter_where, [:set, :private]),
eq_index_table: :ets.new(:filter_eq, [:set, :private]),
incl_index_table: :ets.new(:filter_incl, [:set, :private]),
key_exists_index_table: :ets.new(:filter_key_exists, [:set, :private]),
refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end)
}
end
Expand Down
2 changes: 2 additions & 0 deletions packages/sync-service/lib/electric/shapes/filter/index.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ defmodule Electric.Shapes.Filter.Index do
alias Electric.Shapes.Filter
alias Electric.Shapes.Filter.Indexes.EqualityIndex
alias Electric.Shapes.Filter.Indexes.InclusionIndex
alias Electric.Shapes.Filter.Indexes.KeyExistenceIndex

defp module_for("="), do: EqualityIndex
defp module_for("@>"), do: InclusionIndex
defp module_for("?"), do: KeyExistenceIndex

def add_shape(%Filter{} = filter, where_cond_id, shape_id, %{operation: op} = optimisation) do
module_for(op).add_shape(filter, where_cond_id, shape_id, optimisation)
Expand Down
Loading
Loading