datacommonsorg · niveditasing · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
@@ -16,6 +16,7 @@
 '''
 import os
 import pandas as pd
+import requests
 
 
 def national1900(output_folder: str):
@@ -41,8 +42,23 @@ def national1900(output_folder: str):
         # 8=Female_NonWhiteAlone
         cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
         # reading the csv format input file and converting it to a dataframe
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\
-            skipfooter=15,encoding='ISO-8859-1')
+        try:
+            # Check if the URL is accessible and returns a CSV
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200 or 'text/csv' not in response.headers.get(
+                    'Content-Type', ''):
+                print(f"Skipping {url} as it is not a CSV or not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=9,
+                             skipfooter=15,
+                             encoding='ISO-8859-1')
+        except Exception as e:
+            print(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",

diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
@@ -16,6 +16,7 @@
 '''
 import os
 import pandas as pd
+import requests
 
 
 def national1960(output_folder: str):
@@ -38,8 +39,22 @@ def national1960(output_folder: str):
         ]
         # Reading the csv format input file and converting it to a dataframe.
         # Skipping unwanted rows from top and bottom.
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\
-            skipfooter=15)
+        try:
+            # Check if the URL is accessible and returns a CSV
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200 or 'text/csv' not in response.headers.get(
+                    'Content-Type', ''):
+                print(f"Skipping {url} as it is not a CSV or not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=8,
+                             skipfooter=15)
+        except Exception as e:
+            print(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",

diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -68,13 +68,18 @@ def add_future_year_urls():
         for YEAR in range(2030, 2020, -1):
             url_to_check = url.format(YEAR=YEAR)
             try:
-                check_url = requests.head(url_to_check)
-                if check_url.status_code == 200:
+                check_url = requests.head(url_to_check, allow_redirects=True)
+                # Check both the status code AND the content type
+                content_type = check_url.headers.get('Content-Type', '')
+
+                if check_url.status_code == 200 and 'text/csv' in content_type:
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
-
-            except:
-                logging.error(f"URL is not accessable {url_to_check}")
+                else:
+                    logging.warning(
+                        f"URL exists but is not a CSV: {url_to_check}")
+            except Exception as e:
+                logging.error(f"URL is not accessible {url_to_check}: {e}")
 
 
 MCF_TEMPLATE = ("Node: dcid:{pv1}\n"