unclecode · biplavbarua · Dec 23, 2025 · Dec 23, 2025 · Dec 25, 2025 · Jan 11, 2026
diff --git a/README.md b/README.md
@@ -1093,6 +1093,7 @@ Our enterprise sponsors and technology partners help scale Crawl4AI to power pro
 
 | Company | About | Sponsorship Tier |
 |------|------|----------------------------|
+| <a href="https://www.thordata.com/?ls=github&lk=crawl4ai" target="_blank"><img src="https://gist.github.com/aravindkarnam/dfc598a67be5036494475acece7e54cf/raw/thor_data.svg" alt="Thor Data" width="120"/></a>  | Leveraging Thordata ensures seamless compatibility with any AI/ML workflows and data infrastructure, massively accessing web data with 99.9% uptime, backed by one-on-one customer support. | 🥈 Silver |
 | <a href="https://app.nstproxy.com/register?i=ecOqW9" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.github.com/aravindkarnam/62f82bd4818d3079d9dd3c31df432cf8/raw/nst-light.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://www.nstproxy.com/logo.svg"><img alt="nstproxy" src="ttps://www.nstproxy.com/logo.svg"></picture></a>  | NstProxy is a trusted proxy provider with over 110M+ real residential IPs, city-level targeting, 99.99% uptime, and low pricing at $0.1/GB, it delivers unmatched stability, scale, and cost-efficiency. | 🥈 Silver |
 | <a href="https://app.scrapeless.com/passport/register?utm_source=official&utm_term=crawl4ai" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.githubusercontent.com/aravindkarnam/0d275b942705604263e5c32d2db27bc1/raw/Scrapeless-light-logo.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"><img alt="Scrapeless" src="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"></picture></a>  | Scrapeless provides production-grade infrastructure for Crawling, Automation, and AI Agents, offering Scraping Browser, 4 Proxy Types and Universal Scraping API. | 🥈 Silver |
 | <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze |

diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py
@@ -312,6 +312,11 @@ def handle_tag(
     ) -> None:
         self.current_tag = tag
 
+        if tag == "base" and start:
+            href = attrs.get("href")
+            if href:
+                self.baseurl = urlparse.urljoin(self.baseurl, href)
+
         if self.tag_callback is not None:
             if self.tag_callback(self, tag, attrs, start) is True:
                 return

diff --git a/tests/test_base_tag_local.py b/tests/test_base_tag_local.py
@@ -0,0 +1,39 @@
+import unittest
+from crawl4ai.html2text import HTML2Text
+
+class TestBaseTag(unittest.TestCase):
+    def test_base_tag_handling(self):
+        html_content = """
+        <html>
+        <head>
+            <base href="https://example.com/subdir/">
+        </head>
+        <body>
+            <a href="page.html">Link</a>
+        </body>
+        </html>
+        """
+
+        # Initialize parser with a different base (or empty)
+        parser = HTML2Text(baseurl="https://override.com/") 
+
+        # Feed content
+        markdown = parser.handle(html_content)
+
+        print(f"Markdown Output: {markdown}")
+
+        # Expected: The link should be resolved against the <base> tag
+        expected_url = "https://example.com/subdir/page.html"
+
+        # Current behavior (bug): It resolves against init baseurl ("https://override.com/page.html")
+        # OR if baseurl is empty, it stays relative "page.html"
+
+        if expected_url in markdown:
+            print("SUCCESS: Base tag respected.")
+        else:
+            print(f"FAILURE: Base tag ignored. Expected {expected_url} in output.")
+
+        self.assertIn(expected_url, markdown)
+
+if __name__ == "__main__":
+    unittest.main()