datacommonsorg · ajaits · Apr 10, 2026 · Dec 30, 2025 · Mar 9, 2026 · Mar 9, 2026
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv
@@ -0,0 +1,6 @@
+StatVar,NumPlaces,MinDate,MeasurementMethods,Units
+InterestRate_TreasuryNote_3Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBond_20Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_5Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_10Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBill_1Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
diff --git a/.../treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv b/.../treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv
@@ -0,0 +1,4 @@
+date,1-Month,3-Month,6-Month,1-Year,2-Year,3-Year,5-Year,7-Year,10-Year,20-Year,30-Year
+1962-01-02,,,,3.22,,3.70,3.88,,4.06,4.07,
+1962-02-01,,,,3.30,,3.81,4.00,,4.09,4.13,
+1962-04-19,,,,3.00,,3.37,3.60,,3.82,3.91,
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json b/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json
@@ -0,0 +1,20 @@
+{
+    "schema_version": "1.0",
+    "rules": [
+        {
+            "rule_id": "check_goldens_output_csv",
+            "validator": "GOLDENS_CHECK",
+            "params": {
+                "golden_files": "golden_data/golden_treasury_constant_maturity_rates.csv",
+                "input_files": "treasury_constant_maturity_rates.csv"
+            }
+        },
+        {
+            "rule_id": "check_goldens_summary_report",
+            "validator": "GOLDENS_CHECK",
+            "params": {
+                "golden_files": "golden_data/golden_summary_report.csv"
+            }
+        }
+    ]
+}
diff --git a/tools/import_validation/README.md b/tools/import_validation/README.md
@@ -85,9 +85,9 @@ Here is an example of a complete configuration file:
         },
         {
             "rule_id": "check_deleted_points_threshold",
-            "validator": "DELETED_RECORDS_COUNT",
+            "validator": "DELETED_RECORDS_PERCENT",
             "params": {
-                "threshold": 10
+                "threshold": 1
             }
         },
         {
@@ -151,7 +151,10 @@ The following validations are currently supported:
 | `NUM_OBSERVATIONS_CHECK`  | Checks that the number of observations is within a defined range.        | `stats`           | `minimum`, `maximum`, or `value` (integer)             |
 | `UNIT_CONSISTENCY_CHECK`  | Checks that the unit is the same for all StatVars.                       | `stats`           | None                                                   |
 | `MIN_VALUE_CHECK`         | Checks that the minimum value is not below a defined minimum.            | `stats`           | `minimum` (integer or float)                           |
-| `MAX_VALUE_CHECK`         | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| MAX_VALUE_CHECK           | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| `GOLDENS_CHECK`                 | Verifies that the data contains all records defined in a golden set.     | `stats`           | `golden_files` (list), `input_files` (list)   |
+
+For more details on the validations, please refer to [Validations.md](Validations.md)
 
 ## Output
 

diff --git a/tools/import_validation/Validations.md b/tools/import_validation/Validations.md
@@ -0,0 +1,131 @@
+# Validation Config.
+
+The default validations in [validation_config.json](validation_config.json) are
+applied for all imports in auto refresh.
+
+To add additional  import specific validations, create a validation_config.json
+in the import script folder and add it to the
+config_overrides.validation_config_file parameter in the manifest.json.
+
+To override or disable a default validation rule, copy the rule to the
+import specific config with the same rule id and
+set the `enabled` setting to false.
+
+Here is an example to override the deleted records threshold and
+disable lint check for a specific import.
+```json
+{
+    "schema_version": "1.0",
+    "rules": [
+        {
+            "rule_id": "check_deleted_records_percent",
+            "description": "Override default threshold to 10%",
+            "validator": "DELETED_RECORDS_PERCENT",
+            "params": {
+                "threshold": 10
+            }
+        },
+        {
+            "rule_id": "check_lint_error_count",
+            "enabled": false,
+        }
+    ]
+}
+```
+
+Here are some additional details for each validation rule.
+
+## Golden Set Validation with `GOLDENS_CHECK`
+
+The `GOLDENS_CHECK` validator ensures that the import contains a specific set of expected records. This is useful for verifying that critical StatVars, Places, or specific metadata combinations are always present in the output.
+
+The validator compares the input data (usually from the `stats` data source) against one or more "golden" files (MCF or CSV).
+
+If any of the combination of values in a row of the golden file is not present
+in the input, the validation is treated as a failure.
+The missing golden rows are listed in the validation report json.
+
+### Configuration Parameters
+- `golden_files`: A list or glob pattern of golden MCF or CSV files to compare against.
+- `goldens_key_property`: A list of properties to match on. If not specified, all properties in the golden record must match.
+- `input_files`: (Optional) A list of glob pattern of input files to be compared with goldens. If not provided, the data source defined in the rule's `scope` is used.
+
+### GOLDENS_CHECK Validator Example
+
+**Rule:** "Ensure that observations for `Count_Person` and `Median_Age_Person` are present in the import as defined in our critical golden set."
+
+```json
+  {
+      "rule_id": "verify_critical_obs",
+      "validator": "GOLDENS_CHECK",
+      "params": {
+          "golden_files": ["golden_data/critical_stats.csv"],
+          "input_files": "processed_obs.csv"
+      }
+  }
+```
+
+The goldens can be generated from a CSV file using the `validator_goldens.py`
+script.
+
+To generate goldens for the summary_report.csv to verify that all the expected
+StatVars are generated with the corresponding number of places and dates, run
+the following:
+
+```shell
+    python3 validator_goldens.py \
+      --validate_goldens_input=summary_report.csv \
+      --generate_goldens=goldens_data/golden_summary_report.csv \
+      --generate_goldens_property_sets="StatVar|NumPlaces|MinDate|MeasurementMethods|Units|ScalingFactors|observationPeriods"
+```
+
+To generate goldens for observations that include important
+statvars, places and dates, run the following with selected StatVar and
+place dcids loaded from txt files:
+
+```shell
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.csv \
+      --generate_goldens=golden_data/golden_observations.csv \
+      --goldens_must_include="variableMeasured:gs://unresolved_mcf/import_validation/nl_statvars.csv,observationAbout:gs://unresolved_mcf/import_validation/top_100k_places.csv" \
+      --generate_goldens_property_sets="variableMeasured|unit|scalingFactor|observationPeriod|measurementMethod,observationAbout,observationDate"
+```
+
+To enable goldens validation with files generated above
+while relaxing the default deleted records threshold, add the following
+valiation rules to the validation config:
+
+```json
+{
+    "schema_version": "1.0",
+    "rules": [
+        {
+            "rule_id": "check_deleted_records_percent",
+            "description": "Relax default deleted records threshold to 10% with additional goldens check to catch statvar series deletions",
+            "validator": "DELETED_RECORDS_PERCENT",
+            "params": {
+                "threshold": 10
+            }
+        },
+        {
+            "rule_id": "check_golden_summary_report",
+            "validator": "GOLDENS_CHECK",
+            "params": {
+                "golden_files": "golden_data/golden_summary_report.csv"
+            }
+        },
+        {
+            "rule_id": "check_golden_observations_statvar_places_dates",
+            "validator": "GOLDENS_CHECK",
+            "params": {
+                "golden_files": "golden_data/golden_observations.csv"
+                "input_files": "output/observations.csv"
+            }
+        }
+    ]
+}
+```
+
+
+
+
diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
@@ -82,6 +82,7 @@ def __init__(self, validation_config_path: str, differ_output: str,
                 (self.validator.validate_min_value_check, 'stats'),
             'MAX_VALUE_CHECK':
                 (self.validator.validate_max_value_check, 'stats'),
+            'GOLDENS_CHECK': (self.validator.validate_goldens, 'stats'),
         }
 
         self._initialize_data_sources(stats_summary, lint_report, differ_output)
@@ -199,14 +200,24 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
             validation_func, data_source_key = self.validation_dispatch[
                 validator_name]
 
+            rule_params = dict(rule.get('params', {}))
+            if rule_params:
+                # Add default parameters for output folder
+                output_dir = self.validation_output
+                if output_dir and not output_dir.endswith(
+                        '/') and not os.path.isdir(output_dir):
+                    output_dir = os.path.dirname(output_dir)
+                if output_dir:
+                    rule_params.setdefault('output_path', output_dir)
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],
-                                         rule['params'])
+                                         rule_params)
             elif validator_name == 'DELETED_RECORDS_PERCENT':
                 result = validation_func(
                     self.data_sources['differ'],
-                    self.data_sources.get('differ_summary'), rule['params'])
+                    self.data_sources.get('differ_summary'), rule_params)
             else:
                 scope = rule.get('scope', {})
                 if isinstance(scope, str):
@@ -222,7 +233,7 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                         regex_patterns=variables_config.get('regex'),
                         contains_all=variables_config.get('contains_all'))
 
-                result = validation_func(df, rule['params'])
+                result = validation_func(df, rule_params)
 
             result.name = rule['rule_id']
             result.validation_params = rule.get('params', {})

diff --git a/tools/import_validation/validator.py b/tools/import_validation/validator.py
@@ -20,8 +20,12 @@
 
 _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(_SCRIPT_DIR)
+_DATA_DIR = os.path.dirname(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
 
 from result import ValidationResult, ValidationStatus
+from counters import Counters
+import validator_goldens
 
 
 class Validator:
@@ -926,3 +930,65 @@ def validate_max_value_check(self, stats_df: pd.DataFrame,
                                     'rows_succeeded': rows_succeeded,
                                     'rows_failed': rows_failed
                                 })
+
+    def validate_goldens(self, df: pd.DataFrame,
+                         params: dict) -> ValidationResult:
+        """Validates records against a golden set.
+
+        Args:
+          df: A DataFrame containing the data to validate (used if input_files
+            is not provided in params).
+          params: A dictionary containing:
+            'golden_files': Path(s) to golden MCF/CSV files.
+            'input_files': (Optional) Path(s) to input files. If not provided,
+                           the 'df' will be used.
+            'output_path': (Optional) folder or output filename to save missing goldens.
+            And other optional validator_goldens config (e.g., goldens_key_property).
+
+        Returns:
+          A ValidationResult object.
+        """
+        golden_files = params.get('golden_files')
+        if not golden_files:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'GOLDENS_CHECK',
+                message=
+                "Configuration error: 'golden_files' must be specified for GOLDENS_CHECK validator."
+            )
+
+        try:
+            inputs = params.get('input_files')
+            if not inputs:
+                inputs = df.to_dict('index')
+            output_path = params.get('output_path')
+            # Compare nodes
+            counters = Counters()
+            missing_goldens = validator_goldens.validate_goldens(
+                inputs,
+                golden_files,
+                output_path,
+                config=params,
+                counters=counters)
+            details = {
+                name: value
+                for name, value in counters.get_counters().items()
+                if 'golden' in name
+            }
+            if not missing_goldens:
+                return ValidationResult(ValidationStatus.PASSED,
+                                        'GOLDENS_CHECK',
+                                        details=details)
+            details['missing_goldens'] = missing_goldens
+
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'GOLDENS_CHECK',
+                message=f"Found {len(missing_goldens)} missing golden records.",
+                details=details)
+
+        except IOError as e:
+            return ValidationResult(
+                ValidationStatus.DATA_ERROR,
+                'GOLDENS_CHECK',
+                message=f"Error during golden validation: {e}")