@@ -614,165 +614,6 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
614614 yield [x for x in items if isinstance (x , dict )]
615615 next_link = data .get ("@odata.nextLink" ) or data .get ("odata.nextLink" ) if isinstance (data , dict ) else None
616616
617- # ----------------------- SQL guardrail patterns --------------------
618- _SQL_WRITE_RE = re .compile (
619- r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b" ,
620- re .IGNORECASE ,
621- )
622- _SQL_COMMENT_RE = re .compile (r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*" , re .DOTALL )
623- _SQL_LEADING_WILDCARD_RE = re .compile (r"\bLIKE\s+'%[^']" , re .IGNORECASE )
624- _SQL_IMPLICIT_CROSS_JOIN_RE = re .compile (
625- r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+" ,
626- re .IGNORECASE ,
627- )
628- # Server-blocked SQL patterns (save the round-trip by catching early)
629- _SQL_UNSUPPORTED_JOIN_RE = re .compile (
630- r"\b(?:CROSS\s+JOIN|RIGHT\s+(?:OUTER\s+)?JOIN|FULL\s+(?:OUTER\s+)?JOIN)\b" ,
631- re .IGNORECASE ,
632- )
633- _SQL_UNION_RE = re .compile (r"\bUNION\b" , re .IGNORECASE )
634- _SQL_HAVING_RE = re .compile (r"\bHAVING\b" , re .IGNORECASE )
635- _SQL_CTE_RE = re .compile (r"^\s*WITH\b" , re .IGNORECASE )
636- _SQL_SUBQUERY_RE = re .compile (
637- r"\bIN\s*\(\s*SELECT\b|\bEXISTS\s*\(\s*SELECT\b|\(\s*SELECT\b.*\bFROM\b" ,
638- re .IGNORECASE ,
639- )
640- # SELECT * is intentionally rejected -- not a technical limitation but a
641- # deliberate design decision. Wide entities (e.g. account has 307 columns)
642- # make SELECT * extremely expensive on shared database infrastructure.
643- # COUNT(*) is NOT matched because COUNT appears before the *.
644- _SQL_SELECT_STAR_RE = re .compile (
645- r"\bSELECT\b\s+(?:DISTINCT\s+)?(?:TOP\s+\d+(?:\s+PERCENT)?\s+)?\*\s" ,
646- re .IGNORECASE ,
647- )
648-
649- def _sql_guardrails (self , sql : str ) -> str :
650- """Apply safety guardrails to a SQL query before sending to the server.
651-
652- Checks split into two categories:
653-
654- **Blocked** (``ValidationError`` -- saves a server round-trip):
655-
656- 1. Write statements (INSERT/UPDATE/DELETE/DROP/etc.)
657- 2. CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (server rejects these)
658- 3. UNION / UNION ALL (server rejects)
659- 4. HAVING clause (server rejects)
660- 5. CTE / WITH clause (server rejects)
661- 6. Subqueries -- IN (SELECT ...), EXISTS (SELECT ...) (server rejects)
662- 7. SELECT * -- intentional design decision, not a technical limitation.
663- Wide entities make wildcard selects extremely expensive on shared
664- database infrastructure. ``COUNT(*)`` is not affected.
665-
666- **Warned** (``UserWarning`` -- query still executes):
667-
668- 8. Leading-wildcard LIKE (full table scan)
669- 9. Implicit cross join FROM a, b (cartesian product)
670-
671- All blocked patterns are also blocked by the server, but catching
672- them here saves the network round-trip and provides clearer error
673- messages. To bypass a specific check (e.g., if the server adds
674- support in the future), all checks are in this single method.
675-
676- :param sql: The SQL string (already stripped).
677- :return: The SQL string (unchanged).
678- :raises ValidationError: If the SQL contains a blocked pattern.
679- """
680- # --- BLOCKED (save server round-trip) ---
681-
682- # 1. Block writes (strip SQL comments first to catch comment-prefixed writes)
683- sql_no_comments = self ._SQL_COMMENT_RE .sub (" " , sql ).strip ()
684- if self ._SQL_WRITE_RE .search (sql_no_comments ):
685- raise ValidationError (
686- "SQL endpoint is read-only. Use client.records or "
687- "client.dataframe for write operations "
688- "(INSERT/UPDATE/DELETE are not supported)." ,
689- subcode = VALIDATION_SQL_WRITE_BLOCKED ,
690- )
691-
692- # 2. Block unsupported JOIN types
693- m = self ._SQL_UNSUPPORTED_JOIN_RE .search (sql )
694- if m :
695- raise ValidationError (
696- f"Unsupported JOIN type: '{ m .group (0 ).strip ()} '. "
697- "Only INNER JOIN and LEFT JOIN are supported by the "
698- "Dataverse SQL endpoint." ,
699- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
700- )
701-
702- # 3. Block UNION
703- if self ._SQL_UNION_RE .search (sql ):
704- raise ValidationError (
705- "UNION is not supported by the Dataverse SQL endpoint. "
706- "Execute separate queries and combine results in Python "
707- "(e.g. pd.concat([df1, df2]))." ,
708- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
709- )
710-
711- # 4. Block HAVING
712- if self ._SQL_HAVING_RE .search (sql ):
713- raise ValidationError (
714- "HAVING is not supported by the Dataverse SQL endpoint. "
715- "Use WHERE to filter before GROUP BY instead." ,
716- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
717- )
718-
719- # 5. Block CTE / WITH
720- if self ._SQL_CTE_RE .search (sql ):
721- raise ValidationError (
722- "CTE (WITH ... AS) is not supported by the Dataverse SQL "
723- "endpoint. Use separate queries and combine in Python." ,
724- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
725- )
726-
727- # 6. Block subqueries
728- if self ._SQL_SUBQUERY_RE .search (sql ):
729- raise ValidationError (
730- "Subqueries are not supported by the Dataverse SQL "
731- "endpoint. Use separate SQL calls and combine results "
732- "in Python (e.g. step 1: get IDs, step 2: WHERE IN)." ,
733- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
734- )
735-
736- # 7. Block SELECT * -- intentional design decision.
737- # Wide entities (e.g. account has 307 columns) make wildcard selects
738- # extremely expensive on shared database infrastructure.
739- # COUNT(*) is NOT matched: _SQL_SELECT_STAR_RE requires * to be the
740- # first token after SELECT/DISTINCT/TOP N, so COUNT appears before *.
741- if self ._SQL_SELECT_STAR_RE .search (sql ):
742- raise ValidationError (
743- "SELECT * is not supported. Specify column names explicitly "
744- "(e.g. SELECT name, revenue FROM account). "
745- "Use client.query.sql_columns('account') to discover available columns." ,
746- subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
747- )
748-
749- # --- WARNED (query still executes) ---
750-
751- # 8. Warn on leading-wildcard LIKE
752- if self ._SQL_LEADING_WILDCARD_RE .search (sql ):
753- warnings .warn (
754- "Query contains a leading-wildcard LIKE pattern "
755- "(e.g. LIKE '%value'). This forces a full table scan "
756- "and may degrade performance on large tables. "
757- "Prefer trailing wildcards (LIKE 'value%') when possible." ,
758- UserWarning ,
759- stacklevel = 4 ,
760- )
761-
762- # 9. Warn on implicit cross joins (server allows but risky)
763- if self ._SQL_IMPLICIT_CROSS_JOIN_RE .search (sql ):
764- warnings .warn (
765- "Query uses an implicit cross join (FROM table1, table2). "
766- "This produces a cartesian product that can generate "
767- "millions of intermediate rows and degrade shared database "
768- "performance. Use explicit JOIN...ON syntax instead: "
769- "FROM table1 a JOIN table2 b ON a.column = b.column" ,
770- UserWarning ,
771- stacklevel = 4 ,
772- )
773-
774- return sql
775-
776617 # --------------------------- SQL Custom API -------------------------
777618 def _query_sql (self , sql : str ) -> list [dict [str , Any ]]:
778619 """Execute a read-only SQL SELECT using the Dataverse Web API ``?sql=`` capability.
0 commit comments