Azure · Copilot · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -106,70 +106,47 @@ def _test_markdown_extraction(self, result):
         assert len(result.contents) == 1, "PDF file should have exactly one content element"
 
         content = result.contents[0]
-        assert content is not None, "Content should not be null"
-
-        # Assertion: Verify markdown content
-        markdown = getattr(content, "markdown", None)
-        if markdown:
-            assert isinstance(markdown, str), "Markdown should be a string"
-            assert len(markdown) > 0, "Markdown content should not be empty"
-            assert markdown.strip(), "Markdown content should not be just whitespace"
-            print(f"[PASS] Markdown content extracted successfully ({len(markdown)} characters)")
-        else:
-            print("[WARN]  No markdown content available")
+        assert isinstance(content, DocumentContent), f"Expected DocumentContent, got {type(content).__name__}"
+
+        # Assertion: Verify markdown content (prebuilt-documentSearch always returns markdown)
+        assert content.markdown, "DocumentContent should have non-empty markdown"
+        assert isinstance(content.markdown, str), "Markdown should be a string"
+        assert content.markdown.strip(), "Markdown content should not be just whitespace"
+        print(f"[PASS] Markdown content extracted successfully ({len(content.markdown)} characters)")
 
     def _test_document_properties(self, result):
         """Test document property access."""
+        assert len(result.contents) > 0, "Result should have at least one content"
         content = result.contents[0]
-        assert content is not None, "Content should not be null for document properties validation"
-
-        # Check if this is DocumentContent
-        content_type = type(content).__name__
-        print(f"[INFO] Content type: {content_type}")
-
-        # Validate this is document content (should have document-specific properties)
-        is_document_content = hasattr(content, "mime_type") and hasattr(content, "start_page_number")
-        if not is_document_content:
-            print(f"[WARN] Expected DocumentContent but got {content_type}, skipping document-specific validations")
-            return
-
-        # Validate MIME type
-        mime_type = getattr(content, "mime_type", None)
-        if mime_type:
-            assert isinstance(mime_type, str), "MIME type should be a string"
-            assert mime_type.strip(), "MIME type should not be empty"
-            assert mime_type == "application/pdf", f"MIME type should be application/pdf, but was {mime_type}"
-            print(f"[PASS] MIME type verified: {mime_type}")
-
-        # Validate page numbers
-        start_page = getattr(content, "start_page_number", None)
-        if start_page is not None:
-            assert start_page >= 1, f"Start page should be >= 1, but was {start_page}"
-
-            end_page = getattr(content, "end_page_number", None)
-            if end_page is not None:
-                assert end_page >= start_page, f"End page {end_page} should be >= start page {start_page}"
-                total_pages = end_page - start_page + 1
-                assert total_pages > 0, f"Total pages should be positive, but was {total_pages}"
-                print(f"[PASS] Page range verified: {start_page} to {end_page} ({total_pages} pages)")
-
-                # Validate pages collection
-                pages = getattr(content, "pages", None)
-                if pages and len(pages) > 0:
-                    assert len(pages) > 0, "Pages collection should not be empty when not null"
-                    assert (
-                        len(pages) == total_pages
-                    ), f"Pages collection count {len(pages)} should match calculated total pages {total_pages}"
-                    print(f"[PASS] Pages collection verified: {len(pages)} pages")
-
-                    # Validate individual pages
-                    self._validate_pages(pages, start_page, end_page, content)
-                else:
-                    print("[WARN] No pages collection available in document content")
-
-        # Validate tables collection
-        tables = getattr(content, "tables", None)
-        if tables and len(tables) > 0:
+        assert isinstance(content, DocumentContent), f"Expected DocumentContent, got {type(content).__name__}"
+
+        # Validate MIME type (Required field)
+        assert content.mime_type == "application/pdf", f"MIME type should be application/pdf, but was {content.mime_type}"
+        print(f"[PASS] MIME type verified: {content.mime_type}")
+
+        # Validate page numbers (Required fields)
+        assert content.start_page_number >= 1, f"Start page should be >= 1, but was {content.start_page_number}"
+        assert content.end_page_number >= content.start_page_number, (
+            f"End page {content.end_page_number} should be >= start page {content.start_page_number}"
+        )
+        total_pages = content.end_page_number - content.start_page_number + 1
+        assert total_pages > 0, f"Total pages should be positive, but was {total_pages}"
+        print(f"[PASS] Page range verified: {content.start_page_number} to {content.end_page_number} ({total_pages} pages)")
+
+        # Validate pages collection (Optional but expected for prebuilt-documentSearch)
+        pages = content.pages
+        if pages:
+            assert (
+                len(pages) == total_pages
+            ), f"Pages collection count {len(pages)} should match calculated total pages {total_pages}"
+            print(f"[PASS] Pages collection verified: {len(pages)} pages")
+            self._validate_pages(pages, content.start_page_number, content.end_page_number, content)
+        else:
+            print("[WARN] No pages collection available in document content")
+
+        # Validate tables collection (Optional)
+        tables = content.tables
+        if tables:
             self._validate_tables(tables)
         else:
             print("No tables found in document content")
@@ -281,7 +258,12 @@ def test_sample_analyze_binary_with_content_range(self, contentunderstanding_end
         assert full_result.contents is not None
         full_doc = full_result.contents[0]
         assert isinstance(full_doc, DocumentContent)
-        full_page_count = len(full_doc.pages) if full_doc.pages else 0
+        assert full_doc.start_page_number == 1, f"Full document should start at page 1, got {full_doc.start_page_number}"
+        assert full_doc.pages, "Full document should have a non-empty pages collection"
+        full_page_count = len(full_doc.pages)
+        assert full_page_count == full_doc.end_page_number, (
+            f"Full page count ({full_page_count}) should equal end_page_number ({full_doc.end_page_number})"
+        )
         print(f"[PASS] Full document: {full_page_count} pages, {len(full_doc.markdown or '')} chars")
 
         # "3-" — pages 3 onward

@@ -209,63 +209,47 @@ def _test_markdown_extraction(self, result):
         assert len(result.contents) == 1, "PDF file should have exactly one content element"
 
         content = result.contents[0]
-        assert content is not None, "Content should not be null"
+        assert isinstance(content, DocumentContent), f"Expected DocumentContent, got {type(content).__name__}"
 
-        markdown = getattr(content, "markdown", None)
-        if markdown:
-            assert isinstance(markdown, str), "Markdown should be a string"
-            assert len(markdown) > 0, "Markdown content should not be empty"
-            assert markdown.strip(), "Markdown content should not be just whitespace"
-            print(f"[PASS] Markdown content extracted successfully ({len(markdown)} characters)")
-        else:
-            print("[WARN] No markdown content available")
+        # Verify markdown (prebuilt-documentSearch always returns markdown)
+        assert content.markdown, "DocumentContent should have non-empty markdown"
+        assert isinstance(content.markdown, str), "Markdown should be a string"
+        assert content.markdown.strip(), "Markdown content should not be just whitespace"
+        print(f"[PASS] Markdown content extracted successfully ({len(content.markdown)} characters)")
 
     def _test_document_properties(self, result):
         """Test document property access."""
+        assert len(result.contents) > 0, "Result should have at least one content"
         content = result.contents[0]
-        assert content is not None, "Content should not be null for document properties validation"
-
-        content_type = type(content).__name__
-        print(f"[INFO] Content type: {content_type}")
-
-        is_document_content = hasattr(content, "mime_type") and hasattr(content, "start_page_number")
-        if not is_document_content:
-            print(f"[WARN] Expected DocumentContent but got {content_type}, skipping document-specific validations")
-            return
-
-        # Validate MIME type
-        mime_type = getattr(content, "mime_type", None)
-        if mime_type:
-            assert isinstance(mime_type, str), "MIME type should be a string"
-            assert mime_type.strip(), "MIME type should not be empty"
-            assert mime_type == "application/pdf", f"MIME type should be application/pdf, but was {mime_type}"
-            print(f"[PASS] MIME type verified: {mime_type}")
-
-        # Validate page numbers
-        start_page = getattr(content, "start_page_number", None)
-        if start_page is not None:
-            assert start_page >= 1, f"Start page should be >= 1, but was {start_page}"
-
-            end_page = getattr(content, "end_page_number", None)
-            if end_page is not None:
-                assert end_page >= start_page, f"End page {end_page} should be >= start page {start_page}"
-                total_pages = end_page - start_page + 1
-                assert total_pages > 0, f"Total pages should be positive, but was {total_pages}"
-                print(f"[PASS] Page range verified: {start_page} to {end_page} ({total_pages} pages)")
-
-                pages = getattr(content, "pages", None)
-                if pages and len(pages) > 0:
-                    assert len(pages) > 0, "Pages collection should not be empty when not null"
-                    assert (
-                        len(pages) == total_pages
-                    ), f"Pages collection count {len(pages)} should match calculated total pages {total_pages}"
-                    print(f"[PASS] Pages collection verified: {len(pages)} pages")
-                    self._validate_pages(pages, start_page, end_page, content)
-                else:
-                    print("[WARN] No pages collection available in document content")
-
-        tables = getattr(content, "tables", None)
-        if tables and len(tables) > 0:
+        assert isinstance(content, DocumentContent), f"Expected DocumentContent, got {type(content).__name__}"
+
+        # Validate MIME type (Required field)
+        assert content.mime_type == "application/pdf", f"MIME type should be application/pdf, but was {content.mime_type}"
+        print(f"[PASS] MIME type verified: {content.mime_type}")
+
+        # Validate page numbers (Required fields)
+        assert content.start_page_number >= 1, f"Start page should be >= 1, but was {content.start_page_number}"
+        assert content.end_page_number >= content.start_page_number, (
+            f"End page {content.end_page_number} should be >= start page {content.start_page_number}"
+        )
+        total_pages = content.end_page_number - content.start_page_number + 1
+        assert total_pages > 0, f"Total pages should be positive, but was {total_pages}"
+        print(f"[PASS] Page range verified: {content.start_page_number} to {content.end_page_number} ({total_pages} pages)")
+
+        # Validate pages collection (Optional but expected for prebuilt-documentSearch)
+        pages = content.pages
+        if pages:
+            assert (
+                len(pages) == total_pages
+            ), f"Pages collection count {len(pages)} should match calculated total pages {total_pages}"
+            print(f"[PASS] Pages collection verified: {len(pages)} pages")
+            self._validate_pages(pages, content.start_page_number, content.end_page_number, content)
+        else:
+            print("[WARN] No pages collection available in document content")
+
+        # Validate tables collection (Optional)
+        tables = content.tables
+        if tables:
             self._validate_tables(tables)
         else:
             print("No tables found in document content")
@@ -274,74 +258,63 @@ def _test_document_properties(self, result):
 
     def _test_audiovisual_properties(self, result):
         """Test audio/visual content properties for video."""
+        assert len(result.contents) > 0, "Result should have at least one content"
         content = result.contents[0]
-        assert content is not None, "Content should not be null"
+        assert isinstance(content, AudioVisualContent), f"Expected AudioVisualContent, got {type(content).__name__}"
 
-        # Verify markdown
-        markdown = getattr(content, "markdown", None)
-        if markdown:
-            assert isinstance(markdown, str), "Markdown should be a string"
-            assert len(markdown) > 0, "Markdown content should not be empty"
-            print(f"[PASS] Video markdown content extracted ({len(markdown)} characters)")
-
-        # Verify timing properties
-        start_time = getattr(content, "start_time_ms", None)
-        if start_time is not None:
-            assert start_time >= 0, f"Start time should be >= 0, but was {start_time}"
-            print(f"[PASS] Video start time verified: {start_time} ms")
-
-        end_time = getattr(content, "end_time_ms", None)
-        if end_time is not None:
-            assert end_time >= 0, f"End time should be >= 0, but was {end_time}"
-            print(f"[PASS] Video end time verified: {end_time} ms")
-
-        # Verify frame size
-        width = getattr(content, "width", None)
-        height = getattr(content, "height", None)
-        if width is not None and height is not None:
-            assert width > 0, f"Video width should be > 0, but was {width}"
-            assert height > 0, f"Video height should be > 0, but was {height}"
-            print(f"[PASS] Video frame size verified: {width} x {height}")
+        # Verify markdown (expected for prebuilt-videoSearch)
+        assert content.markdown, "Video content should have non-empty markdown"
+        assert isinstance(content.markdown, str), "Markdown should be a string"
+        print(f"[PASS] Video markdown content extracted ({len(content.markdown)} characters)")
 
-        # Verify summary field
-        fields = getattr(content, "fields", None)
-        if fields:
-            summary = fields.get("Summary")
+        # Verify timing properties (Required fields)
+        assert content.start_time_ms >= 0, f"Start time should be >= 0, but was {content.start_time_ms}"
+        print(f"[PASS] Video start time verified: {content.start_time_ms} ms")
+
+        assert content.end_time_ms > content.start_time_ms, (
+            f"End time ({content.end_time_ms}) should be > start time ({content.start_time_ms})"
+        )
+        print(f"[PASS] Video end time verified: {content.end_time_ms} ms")
+
+        # Verify frame size (Optional — applicable to video only)
+        if content.width is not None and content.height is not None:
+            assert content.width > 0, f"Video width should be > 0, but was {content.width}"
+            assert content.height > 0, f"Video height should be > 0, but was {content.height}"
+            print(f"[PASS] Video frame size verified: {content.width} x {content.height}")
+
+        # Verify summary field (Optional)
+        if content.fields:
+            summary = content.fields.get("Summary")
             if summary:
                 print("[PASS] Summary field available in video content")
 
         print("[PASS] All audio/visual properties validated successfully")
 
     def _test_audio_properties(self, result):
         """Test audio content properties including transcript phrases."""
+        assert len(result.contents) > 0, "Result should have at least one content"
         content = result.contents[0]
-        assert content is not None, "Content should not be null"
+        assert isinstance(content, AudioVisualContent), f"Expected AudioVisualContent, got {type(content).__name__}"
 
-        # Verify markdown
-        markdown = getattr(content, "markdown", None)
-        if markdown:
-            assert isinstance(markdown, str), "Markdown should be a string"
-            assert len(markdown) > 0, "Markdown content should not be empty"
-            print(f"[PASS] Audio markdown content extracted ({len(markdown)} characters)")
+        # Verify markdown (expected for prebuilt-audioSearch)
+        assert content.markdown, "Audio content should have non-empty markdown"
+        assert isinstance(content.markdown, str), "Markdown should be a string"
+        print(f"[PASS] Audio markdown content extracted ({len(content.markdown)} characters)")
 
-        # Verify timing properties
-        start_time = getattr(content, "start_time_ms", None)
-        if start_time is not None:
-            assert start_time >= 0, f"Start time should be >= 0, but was {start_time}"
-            print(f"[PASS] Audio start time verified: {start_time} ms")
+        # Verify start time (Required field)
+        assert content.start_time_ms >= 0, f"Start time should be >= 0, but was {content.start_time_ms}"
+        print(f"[PASS] Audio start time verified: {content.start_time_ms} ms")
 
-        # Verify summary field
-        fields = getattr(content, "fields", None)
-        if fields:
-            summary = fields.get("Summary")
+        # Verify summary field (Optional)
+        if content.fields:
+            summary = content.fields.get("Summary")
             if summary:
                 print("[PASS] Summary field available in audio content")
 
-        # Verify transcript phrases
-        transcript_phrases = getattr(content, "transcript_phrases", None)
-        if transcript_phrases and len(transcript_phrases) > 0:
-            print(f"[PASS] Transcript phrases found: {len(transcript_phrases)} phrases")
-            for phrase in transcript_phrases[:2]:
+        # Verify transcript phrases (Optional)
+        if content.transcript_phrases:
+            print(f"[PASS] Transcript phrases found: {len(content.transcript_phrases)} phrases")
+            for phrase in content.transcript_phrases[:2]:
                 speaker = getattr(phrase, "speaker", None)
                 text = getattr(phrase, "text", None)
                 start_ms = getattr(phrase, "start_time_ms", None)