apify · honzajavorek · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Mar 24, 2026
@@ -9,7 +9,7 @@ import CodeBlock from '@theme/CodeBlock';
 import LegacyJsCourseAdmonition from '@site/src/components/LegacyJsCourseAdmonition';
 import Exercises from '../scraping_basics/_exercises.mdx';
 import JsLlmProjectsExercise from '!!raw-loader!roa-loader!./exercises/js_llm_projects.mjs';
-import CnnSportsShortestArticleExercise from '!!raw-loader!roa-loader!./exercises/cnn_sports_shortest_article.mjs';
+import EurozonePopulationExercise from '!!raw-loader!roa-loader!./exercises/eurozone_population.mjs';
 
 <LegacyJsCourseAdmonition />
 
@@ -394,19 +394,13 @@ Your output should look something like this:
   <CodeBlock language="js">{JsLlmProjectsExercise.code}</CodeBlock>
 </details>
 
-### Find the shortest CNN article which made it to the Sports homepage
+### Count eurozone population from country pages
 
-Scrape the [CNN Sports](https://edition.cnn.com/sport) homepage. For each linked article, calculate its length in characters:
+Scrape the [countries using the euro](https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en) page.
 
-- Locate the element that holds the main content of the article.
-- Use `.text()` to extract all the content as plain text.
-- Use `.length` to calculate the character count.
-
-Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
-
-At the time of writing, the shortest article on the CNN Sports homepage is [about a donation to the Augusta National Golf Club](https://edition.cnn.com/2024/10/03/sport/masters-donation-hurricane-helene-relief-spt-intl/), which is just 1,642 characters long.
+Locate links for countries in the **Euro area countries** section. Visit each linked country detail page, find the value labeled **Population**, and sum them all to get the total population of all countries using euro as their currency. Print one number, the sum.
 
 <details>
   <summary>Solution</summary>
-  <CodeBlock language="js">{CnnSportsShortestArticleExercise.code}</CodeBlock>
+  <CodeBlock language="js">{EurozonePopulationExercise.code}</CodeBlock>
 </details>
@@ -0,0 +1,42 @@
+import * as cheerio from 'cheerio';
+
+async function download(url) {
+  const response = await fetch(url);
+  if (!response.ok) {
+    throw new Error(`HTTP ${response.status}`);
+  }
+  const html = await response.text();
+  return cheerio.load(html);
+}
+
+function parsePopulation($) {
+  for (const element of $('li').toArray()) {
+    const text = $(element).text();
+    if (text.includes('Population')) {
+      const digits = text
+        .replace('Population:', '')
+        .replaceAll(' ', '');
+      return Number.parseInt(digits, 10);
+    }
+  }
+  throw new Error('Population not found');
+}
+
+const listingUrl = 'https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en';
+const $ = await download(listingUrl);
+
+const $euroCountriesAccordion = $('.ecl-accordion__item').first();
+const $countryLinks = $euroCountriesAccordion.find('li a');
+
+const promises = $countryLinks.toArray().map(async (element) => {
+  const countryUrl = new URL($(element).attr('href'), listingUrl).href;
+  const $country = await download(countryUrl);
+  return parsePopulation($country);
+});
+
+const populations = await Promise.all(promises);
+const totalPopulation = populations
+  .filter((population) => Number.isInteger(population))
+  .reduce((sum, population) => sum + population, 0);
+
+console.log(totalPopulation);
@@ -144,10 +144,10 @@ teardown_file() {
   [[ "$output" == *' updatedOn: '* ]]
 }
 
-@test "finds the shortest CNN sports article" {
-  run node cnn_sports_shortest_article.mjs
+@test "counts total eurozone population" {
+  run node eurozone_population.mjs
 
-  [[ "$output" == 'https://edition.cnn.com/'* ]]
+  [[ "$output" -gt 300000000 ]]
 }
 
 @test "scrapes F1 Academy driver details with Crawlee" {

@@ -8,7 +8,7 @@ slug: /scraping-basics-python/scraping-variants
 import CodeBlock from '@theme/CodeBlock';
 import Exercises from '../scraping_basics/_exercises.mdx';
 import PythonJobsDatabaseExercise from '!!raw-loader!roa-loader!./exercises/python_jobs_database.py';
-import CnnSportsShortestArticleExercise from '!!raw-loader!roa-loader!./exercises/cnn_sports_shortest_article.py';
+import EurozonePopulationExercise from '!!raw-loader!roa-loader!./exercises/eurozone_population.py';
 
 **In this lesson, we'll scrape the product detail pages to represent each product variant as a separate item in our dataset.**
 
@@ -348,19 +348,13 @@ You can find everything you need for working with dates and times in Python's [`
   <CodeBlock language="py">{PythonJobsDatabaseExercise.code}</CodeBlock>
 </details>
 
-### Find the shortest CNN article which made it to the Sports homepage
+### Count eurozone population from country pages
 
-Scrape the [CNN Sports](https://edition.cnn.com/sport) homepage. For each linked article, calculate its length in characters:
+Scrape the [countries using the euro](https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en) page.
 
-- Locate the element that holds the main content of the article.
-- Use [`get_text()`](https://beautiful-soup-4.readthedocs.io/en/latest/index.html#get-text) to extract all the content as plain text.
-- Use `len()` to calculate the character count.
-
-Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
-
-At the time of writing, the shortest article on the CNN Sports homepage is [about a donation to the Augusta National Golf Club](https://edition.cnn.com/2024/10/03/sport/masters-donation-hurricane-helene-relief-spt-intl/), which is just 1,642 characters long.
+Locate links for countries in the **Euro area countries** section. Visit each linked country detail page, find the value labeled **Population**, and sum them all to get the total population of all countries using euro as their currency. Print one number, the sum.
 
 <details>
   <summary>Solution</summary>
-  <CodeBlock language="py">{CnnSportsShortestArticleExercise.code}</CodeBlock>
+  <CodeBlock language="py">{EurozonePopulationExercise.code}</CodeBlock>
 </details>
@@ -0,0 +1,29 @@
+import httpx
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+
+def download(url: str) -> BeautifulSoup:
+    response = httpx.get(url)
+    response.raise_for_status()
+    return BeautifulSoup(response.text, "html.parser")
+
+
+def parse_population(country_soup: BeautifulSoup) -> int | None:
+    for item in country_soup.select("li"):
+        if "Population" in item.text:
+            digits = item.text.replace("Population:", "").replace(" ", "")
+            return int(digits)
+    raise ValueError("Population not found")
+
+
+listing_url = "https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en"
+listing_soup = download(listing_url)
+
+total_population = 0
+euro_countries_accordion = listing_soup.select(".ecl-accordion__item")[0]
+for country_link in euro_countries_accordion.select("li a"):
+    country_url = urljoin(listing_url, country_link["href"])
+    country_soup = download(country_url)
+    total_population += parse_population(country_soup)
+print(total_population)
@@ -134,10 +134,10 @@ teardown() {
   [[ "$output" == *"'posted_on': datetime.date("* ]]
 }
 
-@test "finds the shortest CNN sports article" {
-  run uv run -q --with=httpx --with=beautifulsoup4 python cnn_sports_shortest_article.py
+@test "counts total eurozone population" {
+  run uv run -q --with=httpx --with=beautifulsoup4 python eurozone_population.py
 
-  [[ "$output" == 'https://edition.cnn.com/'* ]]
+  [[ "$output" -gt 300000000 ]]
 }
 
 @test "scrapes F1 Academy driver details with Crawlee" {