Skip to content

Commit 02e5b0d

Browse files
committed
fix sql parsing, add tests
1 parent 6497d40 commit 02e5b0d

6 files changed

Lines changed: 73 additions & 21 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ A minimal Python SDK to use Microsoft Dataverse as a database for Azure AI Found
1414
## Features
1515

1616
- Simple `DataverseClient` facade for CRUD, SQL (read-only), and table metadata.
17-
- SQL-over-API: Constrained T-SQL (single SELECT with limited WHERE/TOP/ORDER BY) via native Web API `?sql=` parameter.
17+
- SQL-over-API: Constrained SQL (single SELECT with limited WHERE/TOP/ORDER BY) via native Web API `?sql=` parameter.
1818
- Table metadata ops: create simple custom tables with primitive columns (string/int/decimal/float/datetime/bool) and delete them.
1919
- Bulk create via `CreateMultiple` (collection-bound) by passing `list[dict]` to `create(entity_set, payloads)`; returns list of created IDs.
2020
- Bulk update via `UpdateMultiple` by calling `update_multiple(entity_set, records)` with primary key attribute present in each record; returns nothing.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ requires = ["setuptools>=61.0"]
33
build-backend = "setuptools.build_meta"
44

55
[project]
6-
name = "dataverse-sdk-poc"
6+
name = "dataverse-python-client"
77
version = "0.1.0"
8-
description = "POC: Dataverse Python SDK with TDS reads and OData CRUD via SQL router"
8+
description = "Dataverse Python client"
99
authors = [{ name = "POC" }]
1010
readme = "README.md"
1111
requires-python = ">=3.10"

src/dataverse_sdk/client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class DataverseClient:
1414
1515
This client exposes a simple, stable surface for:
1616
- OData CRUD: create, get, update, delete records
17-
- SQL (read-only): execute T-SQL via Dataverse Custom API (no ODBC/TDS driver)
17+
- SQL (read-only): query SQL via ?sql parameter in Web API
1818
- Table metadata: create, inspect, and delete simple custom tables
1919
2020
The client owns authentication (Azure Identity) and configuration, and delegates
@@ -183,7 +183,7 @@ def get_multiple(
183183
)
184184

185185
# SQL via Web API sql parameter
186-
def query_sql(self, tsql: str):
186+
def query_sql(self, sql: str):
187187
"""Execute a read-only SQL query using the Dataverse Web API `?sql=` capability.
188188
189189
The query must follow the currently supported subset: single SELECT with optional WHERE,
@@ -192,15 +192,15 @@ def query_sql(self, tsql: str):
192192
193193
Parameters
194194
----------
195-
tsql : str
195+
sql : str
196196
Supported single SELECT statement.
197197
198198
Returns
199199
-------
200200
list[dict]
201201
Result rows (empty list if none).
202202
"""
203-
return self._get_odata().query_sql(tsql)
203+
return self._get_odata().query_sql(sql)
204204

205205
# Table metadata helpers
206206
def get_table_info(self, tablename: str) -> Optional[Dict[str, Any]]:

src/dataverse_sdk/odata.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
376376
next_link = data.get("@odata.nextLink") or data.get("odata.nextLink") if isinstance(data, dict) else None
377377

378378
# --------------------------- SQL Custom API -------------------------
379-
def query_sql(self, tsql: str) -> list[dict[str, Any]]:
379+
def query_sql(self, sql: str) -> list[dict[str, Any]]:
380380
"""Execute a read-only SQL query using the Dataverse Web API `?sql=` capability.
381381
382382
The platform supports a constrained subset of SQL SELECT statements directly on entity set endpoints:
@@ -387,7 +387,7 @@ def query_sql(self, tsql: str) -> list[dict[str, Any]]:
387387
388388
Parameters
389389
----------
390-
tsql : str
390+
sql : str
391391
Single SELECT statement within supported subset.
392392
393393
Returns
@@ -402,17 +402,12 @@ def query_sql(self, tsql: str) -> list[dict[str, Any]]:
402402
RuntimeError
403403
If metadata lookup for the logical name fails.
404404
"""
405-
if not isinstance(tsql, str) or not tsql.strip():
406-
raise ValueError("tsql must be a non-empty string")
407-
sql = tsql.strip()
405+
if not isinstance(sql, str) or not sql.strip():
406+
raise ValueError("sql must be a non-empty string")
407+
sql = sql.strip()
408408

409-
# Naive parse: find token after FROM (ignore brackets); stop at whitespace/newline
410-
# Example: SELECT name FROM account AS a WHERE a.name LIKE 'Acme%'
411-
m = re.search(r"from\s+([a-zA-Z0-9_]+)\b", sql, flags=re.IGNORECASE)
412-
if not m:
413-
raise ValueError("Unable to determine table logical name from SQL (expected 'FROM <logical>').")
414-
logical_candidate = m.group(1)
415-
logical = logical_candidate.lower()
409+
# Extract logical table name via helper (robust to identifiers ending with 'from')
410+
logical = self._extract_logical_table(sql)
416411

417412
entity_set = self._entity_set_from_logical(logical)
418413
# Issue GET /{entity_set}?sql=<query>
@@ -445,6 +440,25 @@ def query_sql(self, tsql: str) -> list[dict[str, Any]]:
445440
return [row for row in body if isinstance(row, dict)]
446441
return []
447442

443+
@staticmethod
444+
def _extract_logical_table(sql: str) -> str:
445+
"""Extract the logical table name after the first standalone FROM.
446+
447+
Examples:
448+
SELECT * FROM account
449+
SELECT col1, startfrom FROM new_sampleitem WHERE col1 = 1
450+
451+
"""
452+
if not isinstance(sql, str):
453+
raise ValueError("sql must be a string")
454+
# Mask out single-quoted string literals to avoid matching FROM inside them.
455+
masked = re.sub(r"'([^']|'')*'", "'x'", sql)
456+
pattern = r"\bfrom\b\s+([A-Za-z0-9_]+)" # minimal, single-line regex
457+
m = re.search(pattern, masked, flags=re.IGNORECASE)
458+
if not m:
459+
raise ValueError("Unable to determine table logical name from SQL (expected 'FROM <name>').")
460+
return m.group(1).lower()
461+
448462
# ---------------------- Entity set resolution -----------------------
449463
def _entity_set_from_logical(self, logical: str) -> str:
450464
"""Resolve entity set name (plural) from a logical (singular) name using metadata.

src/dataverse_sdk/odata_pandas_wrappers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,13 @@ def get_ids(self, entity_set: str, ids: Sequence[str] | pd.Series | pd.Index, se
139139
return pd.DataFrame(rows)
140140

141141
# --------------------------- Query SQL -------------------------------
142-
def query_sql_df(self, tsql: str) -> pd.DataFrame:
142+
def query_sql_df(self, sql: str) -> pd.DataFrame:
143143
"""Execute a SQL query via the Dataverse Web API `?sql=` parameter and return a DataFrame.
144144
145145
The statement must adhere to the supported subset (single SELECT, optional WHERE/TOP/ORDER BY, no joins).
146146
Empty result -> empty DataFrame (columns inferred only if rows present).
147147
"""
148-
rows: Any = self._c.query_sql(tsql)
148+
rows: Any = self._c.query_sql(sql)
149149

150150
# If API returned a JSON string, parse it
151151
if isinstance(rows, str):

tests/test_sql_parse.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pytest
2+
from dataverse_sdk.odata import ODataClient
3+
4+
class DummyAuth:
5+
def acquire_token(self, scope):
6+
class T: access_token = "x" # no real token needed for parsing tests
7+
return T()
8+
9+
def _client():
10+
return ODataClient(DummyAuth(), "https://org.example", None)
11+
12+
def test_basic_from():
13+
c = _client()
14+
assert c._extract_logical_table("SELECT a FROM account") == "account"
15+
16+
def test_underscore_name():
17+
c = _client()
18+
assert c._extract_logical_table("select x FROM new_sampleitem where x=1") == "new_sampleitem"
19+
20+
def test_startfrom_identifier():
21+
c = _client()
22+
# Ensure we pick the real table 'case', not 'from' portion inside 'startfrom'
23+
assert c._extract_logical_table("SELECT col, startfrom FROM case") == "case"
24+
25+
def test_case_insensitive_keyword():
26+
c = _client()
27+
assert c._extract_logical_table("SeLeCt 1 FrOm ACCOUNT") == "account"
28+
29+
def test_missing_from_raises():
30+
c = _client()
31+
with pytest.raises(ValueError):
32+
c._extract_logical_table("SELECT 1")
33+
34+
def test_from_as_value_not_table():
35+
c = _client()
36+
# Table should still be 'incident'; word 'from' earlier shouldn't interfere
37+
sql = "SELECT 'from something', col FROM incident"
38+
assert c._extract_logical_table(sql) == "incident"

0 commit comments

Comments
 (0)