Skip to content

Commit 62d1659

Browse files
author
Samson Gebre
committed
Improve migration tool; fix bugs
1 parent 91ada39 commit 62d1659

10 files changed

Lines changed: 2485 additions & 61 deletions

File tree

src/PowerPlatform/Dataverse/client.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,16 @@ class DataverseClient:
5050
:type context: ~PowerPlatform.Dataverse.core.config.OperationContext or None
5151
5252
:raises ValueError: If ``base_url`` is missing or empty after trimming.
53+
:raises ValueError: If ``base_url`` does not use HTTPS. Dataverse only
54+
serves over HTTPS; constructing with ``http://`` would send the OAuth
55+
bearer token unencrypted, so the check fails fast at construction.
5356
:raises ValueError: If both ``config`` and ``context`` are provided.
57+
:raises AttributeError: If a v0 beta shortcut (``create``, ``update``,
58+
``delete``, ``get``, ``query_sql``, ``create_table``, ``delete_table``,
59+
``list_tables``, ``get_table_info``, ``create_columns``,
60+
``delete_columns``, ``upload_file``) is accessed. The error message
61+
names the GA replacement and the codemod command. See
62+
:attr:`_REMOVED_BETA_METHODS`.
5463
5564
.. note::
5665
The client lazily initializes its internal OData client on first use, allowing lightweight construction without immediate network calls.
@@ -70,6 +79,12 @@ class DataverseClient:
7079
- ``client.dataframe`` -- pandas DataFrame wrappers for record CRUD
7180
- ``client.batch`` -- batch multiple operations into a single HTTP request
7281
82+
v0 beta methods (``client.create``, ``client.query_sql``, etc.) were removed
83+
in 1.0 GA. Calling one now raises :class:`AttributeError` with a message
84+
naming the GA replacement and the codemod command -- previously these calls
85+
raised a bare ``AttributeError`` with no migration hint, so debugging
86+
half-migrated code was painful. See :attr:`_REMOVED_BETA_METHODS`.
87+
7388
The client supports Python's context manager protocol for automatic resource
7489
cleanup and HTTP connection pooling:
7590
@@ -95,6 +110,26 @@ class DataverseClient:
95110
client.close()
96111
"""
97112

113+
# v0 beta methods removed in 1.0 GA -> human-readable GA replacement.
114+
# Kept in sync with ``migrate_v0_to_v1._CLIENT_SHORTCUTS``: any addition
115+
# there should land here too so the runtime error and the codemod's
116+
# rewrite always agree. ``__getattr__`` reads this to turn what used to
117+
# be a bare ``AttributeError`` into one with an actionable hint.
118+
_REMOVED_BETA_METHODS = {
119+
"create": "client.records.create(table, data)",
120+
"update": "client.records.update(table, record_id, data)",
121+
"delete": "client.records.delete(table, record_id)",
122+
"get": "client.records.get(table, ...)",
123+
"query_sql": "client.query.sql(sql)",
124+
"get_table_info": "client.tables.get(table)",
125+
"create_table": "client.tables.create(...)",
126+
"delete_table": "client.tables.delete(table)",
127+
"list_tables": "client.tables.list()",
128+
"create_columns": "client.tables.add_columns(table, ...)",
129+
"delete_columns": "client.tables.remove_columns(table, ...)",
130+
"upload_file": "client.files.upload(...)",
131+
}
132+
98133
def __init__(
99134
self,
100135
base_url: str,
@@ -111,6 +146,15 @@ def __init__(
111146
self._base_url = (base_url or "").rstrip("/")
112147
if not self._base_url:
113148
raise ValueError("base_url is required.")
149+
# Dataverse never serves over plaintext. A typo or copy-paste that leaves
150+
# the scheme as ``http://`` would send the bearer token unencrypted on the
151+
# very first request -- by then the credential is already on the wire.
152+
# Fail-fast at construction so the leak can't happen.
153+
if not self._base_url.lower().startswith("https://"):
154+
raise ValueError(
155+
f"base_url must use HTTPS (got {base_url!r}). Dataverse only serves "
156+
f"over HTTPS; plaintext would send OAuth bearer tokens unencrypted."
157+
)
114158
if config is not None:
115159
self._config = config
116160
elif context is not None:
@@ -215,6 +259,28 @@ def _check_closed(self) -> None:
215259
if self._closed:
216260
raise RuntimeError("DataverseClient is closed")
217261

262+
def __getattr__(self, name: str):
263+
"""Surface a migration hint for v0 beta shortcuts removed at 1.0 GA.
264+
265+
Python only calls this when normal attribute lookup has failed, so the
266+
GA namespaces (``records``, ``query``, ``tables``, ``files``,
267+
``dataframe``, ``batch``) -- which are set on the instance in
268+
``__init__`` -- never reach here. Names starting with ``_`` fall
269+
through to a plain :class:`AttributeError` so pickling, ``copy.deepcopy``,
270+
IDE introspection, and similar protocol lookups behave normally.
271+
"""
272+
if name.startswith("_"):
273+
raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}")
274+
new_call = self._REMOVED_BETA_METHODS.get(name)
275+
if new_call is None:
276+
raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}")
277+
raise AttributeError(
278+
f"'DataverseClient' has no attribute {name!r}. This was a v0 beta "
279+
f"method removed in 1.0 GA. Use {new_call} instead. To migrate a "
280+
f"codebase automatically, run: "
281+
f"python -m PowerPlatform.Dataverse.migration.migrate_v0_to_v1 <path>"
282+
)
283+
218284
# ---------------- Cache utilities ----------------
219285

220286
def flush_cache(self, kind) -> int:

src/PowerPlatform/Dataverse/core/_auth.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from azure.core.credentials import TokenCredential
1717

1818

19-
@dataclass
19+
@dataclass(repr=False)
2020
class _TokenPair:
2121
"""
2222
Container for an OAuth2 access token and its associated resource scope.
@@ -25,11 +25,22 @@ class _TokenPair:
2525
:type resource: :class:`str`
2626
:param access_token: The access token string.
2727
:type access_token: :class:`str`
28+
29+
.. note::
30+
``repr()`` / ``str()`` redact ``access_token`` to ``[REDACTED]``. Python's
31+
default dataclass ``__repr__`` would otherwise emit the full bearer
32+
JWT, so any accidental ``print()``, ``logging.debug(self)``, or
33+
``traceback`` with locals dumps would leak the token. The redaction
34+
mirrors the ``Authorization``-header treatment in
35+
:mod:`~PowerPlatform.Dataverse.core._http_logger`.
2836
"""
2937

3038
resource: str
3139
access_token: str
3240

41+
def __repr__(self) -> str:
42+
return f"_TokenPair(resource={self.resource!r}, access_token='[REDACTED]')"
43+
3344

3445
class _AuthManager:
3546
"""

src/PowerPlatform/Dataverse/core/log_config.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,20 @@
1717

1818
__all__ = ["LogConfig"]
1919

20-
# Headers whose values must never appear in log files
20+
# Headers whose values must never appear in log files.
21+
#
22+
# ``set-cookie`` / ``cookie`` are session-bearing: Dataverse responses include
23+
# session identifiers (``ReqClientId``, ``orgId``, ...) with very long expiries.
24+
# Captured in a log file they enable session-replay against the same environment
25+
# for the lifetime of the cookie, so they get the same treatment as bearer tokens.
2126
_DEFAULT_REDACTED_HEADERS: FrozenSet[str] = frozenset(
2227
{
2328
"authorization",
2429
"proxy-authorization",
2530
"x-ms-authorization-auxiliary",
2631
"ocp-apim-subscription-key",
32+
"set-cookie",
33+
"cookie",
2734
}
2835
)
2936

@@ -51,7 +58,11 @@ class LogConfig:
5158
sessions — bodies may contain PII and sensitive business data.
5259
:param redacted_headers: Header names (case-insensitive) whose values are
5360
replaced with ``"[REDACTED]"`` in logs. Defaults include
54-
``Authorization``, ``Proxy-Authorization``, etc.
61+
``Authorization``, ``Proxy-Authorization``,
62+
``X-MS-Authorization-Auxiliary``, ``Ocp-Apim-Subscription-Key``,
63+
``Set-Cookie``, and ``Cookie``. Cookie headers are redacted because
64+
Dataverse responses include session-bearing values
65+
(``ReqClientId``, ``orgId``) that enable session replay if leaked.
5566
:param log_level: Python logging level name. Default: ``"DEBUG"``.
5667
:param max_file_bytes: Max size per log file before rotation (bytes).
5768
Default: ``10_485_760`` (10 MB).

0 commit comments

Comments
 (0)