Skip to content

Commit 656ed26

Browse files
author
Abel Milash
committed
Fix rebase artifacts: remove duplicate methods from _ODataClient, add missing imports to _ODataBase
- Remove _SQL_* patterns and _sql_guardrails from _ODataClient (now inherited from _ODataBase) - Add warnings import to _odata_base.py (needed by _sql_guardrails) - Add VALIDATION_SQL_WRITE_BLOCKED and VALIDATION_SQL_UNSUPPORTED_SYNTAX imports to _odata_base.py - Add missing table name lowercasing logic to _ODataBase._build_lookup_field_models
1 parent 14a5900 commit 656ed26

2 files changed

Lines changed: 17 additions & 163 deletions

File tree

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 0 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -614,165 +614,6 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
614614
yield [x for x in items if isinstance(x, dict)]
615615
next_link = data.get("@odata.nextLink") or data.get("odata.nextLink") if isinstance(data, dict) else None
616616

617-
# ----------------------- SQL guardrail patterns --------------------
618-
_SQL_WRITE_RE = re.compile(
619-
r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b",
620-
re.IGNORECASE,
621-
)
622-
_SQL_COMMENT_RE = re.compile(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*", re.DOTALL)
623-
_SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE)
624-
_SQL_IMPLICIT_CROSS_JOIN_RE = re.compile(
625-
r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+",
626-
re.IGNORECASE,
627-
)
628-
# Server-blocked SQL patterns (save the round-trip by catching early)
629-
_SQL_UNSUPPORTED_JOIN_RE = re.compile(
630-
r"\b(?:CROSS\s+JOIN|RIGHT\s+(?:OUTER\s+)?JOIN|FULL\s+(?:OUTER\s+)?JOIN)\b",
631-
re.IGNORECASE,
632-
)
633-
_SQL_UNION_RE = re.compile(r"\bUNION\b", re.IGNORECASE)
634-
_SQL_HAVING_RE = re.compile(r"\bHAVING\b", re.IGNORECASE)
635-
_SQL_CTE_RE = re.compile(r"^\s*WITH\b", re.IGNORECASE)
636-
_SQL_SUBQUERY_RE = re.compile(
637-
r"\bIN\s*\(\s*SELECT\b|\bEXISTS\s*\(\s*SELECT\b|\(\s*SELECT\b.*\bFROM\b",
638-
re.IGNORECASE,
639-
)
640-
# SELECT * is intentionally rejected -- not a technical limitation but a
641-
# deliberate design decision. Wide entities (e.g. account has 307 columns)
642-
# make SELECT * extremely expensive on shared database infrastructure.
643-
# COUNT(*) is NOT matched because COUNT appears before the *.
644-
_SQL_SELECT_STAR_RE = re.compile(
645-
r"\bSELECT\b\s+(?:DISTINCT\s+)?(?:TOP\s+\d+(?:\s+PERCENT)?\s+)?\*\s",
646-
re.IGNORECASE,
647-
)
648-
649-
def _sql_guardrails(self, sql: str) -> str:
650-
"""Apply safety guardrails to a SQL query before sending to the server.
651-
652-
Checks split into two categories:
653-
654-
**Blocked** (``ValidationError`` -- saves a server round-trip):
655-
656-
1. Write statements (INSERT/UPDATE/DELETE/DROP/etc.)
657-
2. CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (server rejects these)
658-
3. UNION / UNION ALL (server rejects)
659-
4. HAVING clause (server rejects)
660-
5. CTE / WITH clause (server rejects)
661-
6. Subqueries -- IN (SELECT ...), EXISTS (SELECT ...) (server rejects)
662-
7. SELECT * -- intentional design decision, not a technical limitation.
663-
Wide entities make wildcard selects extremely expensive on shared
664-
database infrastructure. ``COUNT(*)`` is not affected.
665-
666-
**Warned** (``UserWarning`` -- query still executes):
667-
668-
8. Leading-wildcard LIKE (full table scan)
669-
9. Implicit cross join FROM a, b (cartesian product)
670-
671-
All blocked patterns are also blocked by the server, but catching
672-
them here saves the network round-trip and provides clearer error
673-
messages. To bypass a specific check (e.g., if the server adds
674-
support in the future), all checks are in this single method.
675-
676-
:param sql: The SQL string (already stripped).
677-
:return: The SQL string (unchanged).
678-
:raises ValidationError: If the SQL contains a blocked pattern.
679-
"""
680-
# --- BLOCKED (save server round-trip) ---
681-
682-
# 1. Block writes (strip SQL comments first to catch comment-prefixed writes)
683-
sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip()
684-
if self._SQL_WRITE_RE.search(sql_no_comments):
685-
raise ValidationError(
686-
"SQL endpoint is read-only. Use client.records or "
687-
"client.dataframe for write operations "
688-
"(INSERT/UPDATE/DELETE are not supported).",
689-
subcode=VALIDATION_SQL_WRITE_BLOCKED,
690-
)
691-
692-
# 2. Block unsupported JOIN types
693-
m = self._SQL_UNSUPPORTED_JOIN_RE.search(sql)
694-
if m:
695-
raise ValidationError(
696-
f"Unsupported JOIN type: '{m.group(0).strip()}'. "
697-
"Only INNER JOIN and LEFT JOIN are supported by the "
698-
"Dataverse SQL endpoint.",
699-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
700-
)
701-
702-
# 3. Block UNION
703-
if self._SQL_UNION_RE.search(sql):
704-
raise ValidationError(
705-
"UNION is not supported by the Dataverse SQL endpoint. "
706-
"Execute separate queries and combine results in Python "
707-
"(e.g. pd.concat([df1, df2])).",
708-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
709-
)
710-
711-
# 4. Block HAVING
712-
if self._SQL_HAVING_RE.search(sql):
713-
raise ValidationError(
714-
"HAVING is not supported by the Dataverse SQL endpoint. "
715-
"Use WHERE to filter before GROUP BY instead.",
716-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
717-
)
718-
719-
# 5. Block CTE / WITH
720-
if self._SQL_CTE_RE.search(sql):
721-
raise ValidationError(
722-
"CTE (WITH ... AS) is not supported by the Dataverse SQL "
723-
"endpoint. Use separate queries and combine in Python.",
724-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
725-
)
726-
727-
# 6. Block subqueries
728-
if self._SQL_SUBQUERY_RE.search(sql):
729-
raise ValidationError(
730-
"Subqueries are not supported by the Dataverse SQL "
731-
"endpoint. Use separate SQL calls and combine results "
732-
"in Python (e.g. step 1: get IDs, step 2: WHERE IN).",
733-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
734-
)
735-
736-
# 7. Block SELECT * -- intentional design decision.
737-
# Wide entities (e.g. account has 307 columns) make wildcard selects
738-
# extremely expensive on shared database infrastructure.
739-
# COUNT(*) is NOT matched: _SQL_SELECT_STAR_RE requires * to be the
740-
# first token after SELECT/DISTINCT/TOP N, so COUNT appears before *.
741-
if self._SQL_SELECT_STAR_RE.search(sql):
742-
raise ValidationError(
743-
"SELECT * is not supported. Specify column names explicitly "
744-
"(e.g. SELECT name, revenue FROM account). "
745-
"Use client.query.sql_columns('account') to discover available columns.",
746-
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
747-
)
748-
749-
# --- WARNED (query still executes) ---
750-
751-
# 8. Warn on leading-wildcard LIKE
752-
if self._SQL_LEADING_WILDCARD_RE.search(sql):
753-
warnings.warn(
754-
"Query contains a leading-wildcard LIKE pattern "
755-
"(e.g. LIKE '%value'). This forces a full table scan "
756-
"and may degrade performance on large tables. "
757-
"Prefer trailing wildcards (LIKE 'value%') when possible.",
758-
UserWarning,
759-
stacklevel=4,
760-
)
761-
762-
# 9. Warn on implicit cross joins (server allows but risky)
763-
if self._SQL_IMPLICIT_CROSS_JOIN_RE.search(sql):
764-
warnings.warn(
765-
"Query uses an implicit cross join (FROM table1, table2). "
766-
"This produces a cartesian product that can generate "
767-
"millions of intermediate rows and degrade shared database "
768-
"performance. Use explicit JOIN...ON syntax instead: "
769-
"FROM table1 a JOIN table2 b ON a.column = b.column",
770-
UserWarning,
771-
stacklevel=4,
772-
)
773-
774-
return sql
775-
776617
# --------------------------- SQL Custom API -------------------------
777618
def _query_sql(self, sql: str) -> list[dict[str, Any]]:
778619
"""Execute a read-only SQL SELECT using the Dataverse Web API ``?sql=`` capability.

src/PowerPlatform/Dataverse/data/_odata_base.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import re
1414
import unicodedata
1515
import uuid
16+
import warnings
1617
from contextlib import contextmanager
1718
from contextvars import ContextVar
1819
from dataclasses import dataclass, field
@@ -26,6 +27,8 @@
2627
from ..core._error_codes import (
2728
VALIDATION_UNSUPPORTED_COLUMN_TYPE,
2829
VALIDATION_UNSUPPORTED_CACHE_KIND,
30+
VALIDATION_SQL_WRITE_BLOCKED,
31+
VALIDATION_SQL_UNSUPPORTED_SYNTAX,
2932
)
3033
from ..models.relationship import (
3134
LookupAttributeMetadata,
@@ -654,7 +657,17 @@ def _build_lookup_field_models(
654657
Returns ``(LookupAttributeMetadata, OneToManyRelationshipMetadata)``.
655658
Used by both the batch resolver and ``TableOperations.create_lookup_field``
656659
to avoid duplicating the metadata assembly logic.
660+
661+
Note: ``referencing_table`` and ``referenced_table`` are lowercased
662+
automatically because Dataverse stores entity logical names in
663+
lowercase. ``lookup_field_name`` is kept as-is (it is a SchemaName).
657664
"""
665+
# Dataverse logical names are always lowercase. Callers may pass
666+
# SchemaName-cased values (e.g. "new_SQLTeam"); normalise here so
667+
# the relationship metadata uses valid logical names.
668+
referencing_lower = referencing_table.lower()
669+
referenced_lower = referenced_table.lower()
670+
658671
lookup = LookupAttributeMetadata(
659672
schema_name=lookup_field_name,
660673
display_name=Label(
@@ -671,12 +684,12 @@ def _build_lookup_field_models(
671684
lookup.description = Label(
672685
localized_labels=[LocalizedLabel(label=description, language_code=language_code)]
673686
)
674-
rel_name = f"{referenced_table}_{referencing_table}_{lookup_field_name}"
687+
rel_name = f"{referenced_lower}_{referencing_lower}_{lookup_field_name}"
675688
relationship = OneToManyRelationshipMetadata(
676689
schema_name=rel_name,
677-
referenced_entity=referenced_table,
678-
referencing_entity=referencing_table,
679-
referenced_attribute=f"{referenced_table}id",
690+
referenced_entity=referenced_lower,
691+
referencing_entity=referencing_lower,
692+
referenced_attribute=f"{referenced_lower}id",
680693
cascade_configuration=CascadeConfiguration(delete=cascade_delete),
681694
)
682695
return lookup, relationship

0 commit comments

Comments
 (0)