Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def begin_analyze_binary(
analyzer_id: str,
binary_input: bytes,
*,
content_range: Optional[Union[str, _models.ContentRange]] = None,
content_range: Optional[str] = None,
content_type: str = "application/octet-stream",
processing_location: Optional[Union[str, _models.ProcessingLocation]] = None,
**kwargs: Any,
Expand All @@ -247,11 +247,10 @@ def begin_analyze_binary(
:type analyzer_id: str
:param binary_input: The binary content of the document to analyze. Required.
:type binary_input: bytes
:keyword content_range: Range of the input to analyze. Accepts a
:class:`~azure.ai.contentunderstanding.models.ContentRange` or a raw string
:keyword content_range: Range of the input to analyze. Accepts a raw string
(ex. ``"1-3,5,9-"``). Document content uses 1-based page numbers,
while audio visual content uses integer milliseconds. Default value is None.
:paramtype content_range: str or ~azure.ai.contentunderstanding.models.ContentRange
:paramtype content_range: str
:keyword content_type: Body Parameter content-type. Content type parameter for binary body.
Default value is "application/octet-stream".
:paramtype content_type: str
Expand All @@ -268,16 +267,13 @@ def begin_analyze_binary(
matches Python's native string indexing behavior (len() and str[i] use code points).
This ensures ContentSpan offsets work correctly with Python string slicing.
"""
# Convert ContentRange to string if needed
content_range_str = str(content_range) if content_range is not None else None

# Call parent implementation with string_encoding set to "codePoint"
# (matches Python's string indexing)
poller = super().begin_analyze_binary(
analyzer_id=analyzer_id,
binary_input=binary_input,
string_encoding="codePoint",
content_range=content_range_str,
content_range=content_range,
content_type=content_type,
processing_location=processing_location,
**kwargs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ async def begin_analyze_binary(
analyzer_id: str,
binary_input: bytes,
*,
content_range: Optional[Union[str, _models.ContentRange]] = None,
content_range: Optional[str] = None,
content_type: str = "application/octet-stream",
processing_location: Optional[Union[str, _models.ProcessingLocation]] = None,
**kwargs: Any,
Expand All @@ -248,11 +248,10 @@ async def begin_analyze_binary(
:type analyzer_id: str
:param binary_input: The binary content of the document to analyze. Required.
:type binary_input: bytes
:keyword content_range: Range of the input to analyze. Accepts a
:class:`~azure.ai.contentunderstanding.models.ContentRange` or a raw string
:keyword content_range: Range of the input to analyze. Accepts a raw string
(ex. ``"1-3,5,9-"``). Document content uses 1-based page numbers,
while audio visual content uses integer milliseconds. Default value is None.
:paramtype content_range: str or ~azure.ai.contentunderstanding.models.ContentRange
:paramtype content_range: str
:keyword content_type: Body Parameter content-type. Content type parameter for binary body.
Default value is "application/octet-stream".
:paramtype content_type: str
Expand All @@ -269,16 +268,13 @@ async def begin_analyze_binary(
matches Python's native string indexing behavior (len() and str[i] use code points).
This ensures ContentSpan offsets work correctly with Python string slicing.
"""
# Convert ContentRange to string if needed
content_range_str = str(content_range) if content_range is not None else None

# Call parent implementation with string_encoding set to "codePoint"
# (matches Python's string indexing)
poller = await super().begin_analyze_binary(
analyzer_id=analyzer_id,
binary_input=binary_input,
string_encoding="codePoint",
content_range=content_range_str,
content_range=content_range,
content_type=content_type,
processing_location=processing_location,
**kwargs,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar
from azure.core import CaseInsensitiveEnumMeta
from azure.core.polling import LROPoller, PollingMethod
from ._content_range import ContentRange
from ._models import (
StringField,
IntegerField,
Expand Down Expand Up @@ -77,7 +76,6 @@ def value(self) -> Optional[Any]: ...
PollingReturnType_co = TypeVar("PollingReturnType_co", covariant=True)

__all__ = [
"ContentRange",
"RecordMergePatchUpdate",
"AnalyzeLROPoller",
"ProcessingLocation",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
from azure.ai.contentunderstanding.aio import ContentUnderstandingClient
from azure.ai.contentunderstanding.models import (
AnalysisResult,
ContentRange,
DocumentContent,
)
from azure.core.credentials import AzureKeyCredential
Expand Down Expand Up @@ -91,72 +90,46 @@ async def main() -> None:
# [END analyze_document_from_binary]

# [START analyze_binary_with_content_range]
# Use a multi-page document for ContentRange demonstrations.
# Use a multi-page document for content range demonstrations.
multi_page_path = "sample_files/mixed_financial_invoices.pdf"
with open(multi_page_path, "rb") as f:
multi_page_bytes = f.read()

# Analyze only pages 3 onward.
print("\nAnalyzing pages 3 onward with ContentRange...")
print("\nAnalyzing pages 3 onward with content range '3-'...")
range_poller = await client.begin_analyze_binary(
analyzer_id="prebuilt-documentSearch",
binary_input=multi_page_bytes,
content_range=ContentRange.pages_from(3),
content_range="3-",
)
range_result: AnalysisResult = await range_poller.result()

if isinstance(range_result.contents[0], DocumentContent):
range_doc = range_result.contents[0]
print(
f"ContentRange analysis returned pages"
f"Content range analysis returned pages"
f" {range_doc.start_page_number} - {range_doc.end_page_number}"
)
# [END analyze_binary_with_content_range]

# [START analyze_binary_with_combined_content_range]
# Analyze pages 1-3, page 5, and pages 9 onward.
print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange...")
print("\nAnalyzing combined pages (1-3, 5, 9-) with content range '1-3,5,9-'...")
combine_range_poller = await client.begin_analyze_binary(
analyzer_id="prebuilt-documentSearch",
binary_input=multi_page_bytes,
content_range=ContentRange.combine(
ContentRange.pages(1, 3),
ContentRange.page(5),
ContentRange.pages_from(9),
),
content_range="1-3,5,9-",
)
combine_range_result: AnalysisResult = await combine_range_poller.result()

if isinstance(combine_range_result.contents[0], DocumentContent):
combine_doc = combine_range_result.contents[0]
print(
f"Combined ContentRange analysis returned pages"
f"Combined content range analysis returned pages"
f" {combine_doc.start_page_number} - {combine_doc.end_page_number}"
)
# [END analyze_binary_with_combined_content_range]

# [START analyze_binary_with_raw_content_range]
# You can also pass a range string directly to the ContentRange constructor.
# This is equivalent to using the factory methods and is useful for dynamically
# constructed or user-supplied ranges.
# Analyze pages 1-3, page 5, and pages 9 onward using a raw range string.
# This is equivalent to: ContentRange.combine(ContentRange.pages(1, 3), ContentRange.page(5), ContentRange.pages_from(9))
print("\nAnalyzing with raw ContentRange string '1-3,5,9-'...")
raw_range_poller = await client.begin_analyze_binary(
analyzer_id="prebuilt-documentSearch",
binary_input=multi_page_bytes,
content_range=ContentRange("1-3,5,9-"),
)
raw_range_result: AnalysisResult = await raw_range_poller.result()

if isinstance(raw_range_result.contents[0], DocumentContent):
raw_doc = raw_range_result.contents[0]
print(
f"Raw ContentRange analysis returned pages"
f" {raw_doc.start_page_number} - {raw_doc.end_page_number}"
)
# [END analyze_binary_with_raw_content_range]

# [START extract_markdown]
print("\nMarkdown Content:")
print("=" * 50)
Expand Down
Loading