Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
self,
*,
endpoint: str,
api_version: str = "2024-07-31-preview",
api_version: str | None = None,
credential: AzureKeyCredential | TokenCredential | None = None,
file_types: List[DocumentIntelligenceFileType] = [
DocumentIntelligenceFileType.DOCX,
Expand All @@ -152,7 +152,7 @@ def __init__(

Args:
endpoint (str): The endpoint for the Document Intelligence service.
api_version (str): The API version to use. Defaults to "2024-07-31-preview".
api_version (str | None): The API version to use. Defaults to the Azure SDK default.
credential (AzureKeyCredential | TokenCredential | None): The credential to use for authentication.
file_types (List[DocumentIntelligenceFileType]): The file types to accept. Defaults to all supported file types.
"""
Expand Down Expand Up @@ -180,11 +180,14 @@ def __init__(

self.endpoint = endpoint
self.api_version = api_version
self.doc_intel_client = DocumentIntelligenceClient(
endpoint=self.endpoint,
api_version=self.api_version,
credential=credential,
)
client_args: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": credential,
}
if self.api_version is not None:
client_args["api_version"] = self.api_version

self.doc_intel_client = DocumentIntelligenceClient(**client_args)

def accepts(
self,
Expand Down
46 changes: 46 additions & 0 deletions packages/markitdown/tests/test_docintel_html.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import markitdown.converters._doc_intel_converter as docintel
from markitdown.converters._doc_intel_converter import (
DocumentIntelligenceConverter,
DocumentIntelligenceFileType,
Expand All @@ -24,3 +25,48 @@ def test_docintel_accepts_html_mimetype():
assert conv.accepts(io.BytesIO(b""), stream_info)
stream_info = StreamInfo(mimetype="application/xhtml+xml", extension=None)
assert conv.accepts(io.BytesIO(b""), stream_info)


def test_docintel_uses_sdk_default_api_version(monkeypatch):
captured_args = {}

class FakeDocumentIntelligenceClient:
def __init__(self, **kwargs):
captured_args.update(kwargs)

monkeypatch.setattr(docintel, "_dependency_exc_info", None)
monkeypatch.setattr(
docintel, "DocumentIntelligenceClient", FakeDocumentIntelligenceClient
)

credential = object()
conv = DocumentIntelligenceConverter(
endpoint="https://example.cognitiveservices.azure.com/",
credential=credential,
)

assert conv.api_version is None
assert "api_version" not in captured_args
assert captured_args["endpoint"] == "https://example.cognitiveservices.azure.com/"
assert captured_args["credential"] is credential


def test_docintel_passes_explicit_api_version(monkeypatch):
captured_args = {}

class FakeDocumentIntelligenceClient:
def __init__(self, **kwargs):
captured_args.update(kwargs)

monkeypatch.setattr(docintel, "_dependency_exc_info", None)
monkeypatch.setattr(
docintel, "DocumentIntelligenceClient", FakeDocumentIntelligenceClient
)

DocumentIntelligenceConverter(
endpoint="https://example.cognitiveservices.azure.com/",
credential=object(),
api_version="2024-07-31-preview",
)

assert captured_args["api_version"] == "2024-07-31-preview"