CCExtractor · pulk17 · Mar 21, 2026
@@ -0,0 +1,27 @@
+"""Add never_worked column to regression_test table
+
+Revision ID: d9f4e5a6b7c8
+Revises: c8f3a2b1d4e5
+Create Date: 2026-03-20 23:25:21.411651000000
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = 'd9f4e5a6b7c8'
+down_revision = 'c8f3a2b1d4e5'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('regression_test', sa.Column('never_worked', sa.Boolean(), nullable=False, server_default='false'))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('regression_test', 'never_worked')
+    # ### end Alembic commands ###
@@ -46,7 +46,8 @@
 from mod_sample.models import Issue
 from mod_test.controllers import get_test_results
 from mod_test.models import (Fork, Test, TestPlatform, TestProgress,
-                             TestResult, TestResultFile, TestStatus, TestType)
+                             TestResult, TestResultFile, TestResultStatus,
+                             TestStatus, TestType)
 from utility import is_valid_signature, request_from_github
 
 # Timeout constants (in seconds)
@@ -2756,6 +2757,7 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
     extra_failed_tests = []
     common_failed_tests = []
     fixed_tests = []
+    never_worked_tests = []
     category_stats = []
 
     test_results = get_test_results(test)
@@ -2764,20 +2766,23 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
         category_name = category_results['category'].name
 
         category_test_pass_count = 0
-        for test in category_results['tests']:
-            if not test['error']:
+        for t in category_results['tests']:
+            if not t['error']:
                 category_test_pass_count += 1
-                if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
-                    fixed_tests.append(test['test'])
+                if last_test_master and getattr(t['test'], platform_column) != last_test_master.id:
+                    fixed_tests.append(t['test'])
             else:
-                if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
-                    common_failed_tests.append(test['test'])
+                # Separate out tests that have NEVER passed on any CCExtractor version
+                if t['status'] == TestResultStatus.never_worked:
+                    never_worked_tests.append(t['test'])
+                elif last_test_master and getattr(t['test'], platform_column) != last_test_master.id:
+                    common_failed_tests.append(t['test'])
                 else:
-                    extra_failed_tests.append(test['test'])
+                    extra_failed_tests.append(t['test'])
 
         category_stats.append(CategoryTestInfo(category_name, len(category_results['tests']), category_test_pass_count))
 
-    return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master)
+    return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master, never_worked_tests)
 
 
 def comment_pr(test: Test) -> str:
@@ -2813,6 +2818,9 @@ def comment_pr(test: Test) -> str:
         log.debug(f"GitHub PR Comment ID {comment.id} Uploaded for Test_id: {test_id}")
     except Exception as e:
         log.error(f"GitHub PR Comment Failed for Test_id: {test_id} with Exception {e}")
+
+    # Determine PR status:
+    # SUCCESS if no regressions caused by PR (never_worked tests don't count)
     return Status.SUCCESS if len(comment_info.extra_failed_tests) == 0 else Status.FAILURE
 
 

@@ -179,3 +179,4 @@ class PrCommentInfo:
     fixed_tests: List[RegressionTest]
     common_failed_tests: List[RegressionTest]
     last_test_master: Test
+    never_worked_tests: List[RegressionTest]
@@ -160,6 +160,7 @@ def test_edit(regression_id):
         test.input_type = InputType.from_string(form.input_type.data)
         test.output_type = OutputType.from_string(form.output_type.data)
         test.description = form.description.data
+        test.never_worked = form.never_worked.data
 
         g.db.commit()
         g.log.info(f'regression test with id: {regression_id} updated!')
@@ -174,6 +175,7 @@ def test_edit(regression_id):
         form.input_type.data = test.input_type.value
         form.output_type.data = test.output_type.value
         form.description.data = test.description
+        form.never_worked.data = test.never_worked
 
     return {'form': form, 'regression_id': regression_id}
 
@@ -247,6 +249,7 @@ def test_add():
             input_type=InputType.from_string(form.input_type.data),
             output_type=OutputType.from_string(form.output_type.data),
             description=form.description.data,
+            never_worked=form.never_worked.data
         )
         g.db.add(new_test)
         category = Category.query.filter(Category.id == form.category_id.data).first()

@@ -1,8 +1,8 @@
 """Maintain forms related to CRUD operations on regression tests."""
 
 from flask_wtf import FlaskForm
-from wtforms import (HiddenField, IntegerField, SelectField, StringField,
-                     SubmitField, TextAreaField)
+from wtforms import (BooleanField, HiddenField, IntegerField, SelectField,
+                     StringField, SubmitField, TextAreaField)
 from wtforms.validators import DataRequired, InputRequired, Length
 
 from mod_regression.models import InputType, OutputType
@@ -36,6 +36,7 @@ class CommonTestForm(FlaskForm):
     )
     category_id = SelectField("Category", coerce=int)
     expected_rc = IntegerField("Expected Runtime Code", [InputRequired(message="Expected Runtime Code can't be empty")])
+    never_worked = BooleanField("Never Worked", default=False)
 
 
 class AddTestForm(CommonTestForm):

@@ -97,9 +97,10 @@ class RegressionTest(Base):
     last_passed_on_windows = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
     last_passed_on_linux = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
     description = Column(String(length=1024))
+    never_worked = Column(Boolean(), default=False, nullable=False, server_default='false')
 
     def __init__(self, sample_id, command, input_type, output_type, category_id, expected_rc,
-                 active=True, description="") -> None:
+                 active=True, description="", never_worked=False) -> None:
         """
         Parametrized constructor for the RegressionTest model.
 
@@ -117,7 +118,10 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
         :type expected_rc: int
         :param active: The value of the 'active' field of RegressionTest model
         :type active: bool
-
+        :param description: The value of the 'description' field of RegressionTest model
+        :type description: str
+        :param never_worked: Boolean flag whether the test has never worked for this sample
+        :type never_worked: bool
         """
         self.sample_id = sample_id
         self.command = command
@@ -127,6 +131,7 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
         self.expected_rc = expected_rc
         self.active = active
         self.description = description
+        self.never_worked = never_worked
 
     def __repr__(self) -> str:
         """

@@ -1,7 +1,7 @@
 """Logic to find all tests, their progress and details of individual test."""
 
 import os
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, TypedDict
 
 from flask import (Blueprint, Response, abort, g, jsonify, redirect, request,
                    url_for)
@@ -16,9 +16,25 @@
 from mod_regression.models import (Category, RegressionTestOutput,
                                    regressionTestLinkTable)
 from mod_test.models import (Fork, Test, TestPlatform, TestProgress,
-                             TestResult, TestResultFile, TestStatus, TestType)
+                             TestResult, TestResultFile, TestResultStatus,
+                             TestStatus, TestType)
 from utility import serve_file_download
 
+
+class CategoryTestItem(TypedDict):
+    test: Any  # RegressionTest
+    result: Optional[TestResult]
+    files: List[TestResultFile]
+    error: bool
+    status: TestResultStatus
+
+
+class CategoryResult(TypedDict):
+    category: Category
+    tests: List[CategoryTestItem]
+    error: bool
+
+
 mod_test = Blueprint('test', __name__)
 
 
@@ -53,24 +69,33 @@
     }
 
 
-def get_test_results(test) -> List[Dict[str, Any]]:
+def get_test_results(test) -> List[CategoryResult]:
     """
-    Get test results for each category.
+    Get test results for each category, with three-way pass/fail/never_worked classification.
+
+    The never_worked status is determined by the explicit `never_worked` boolean flag on each
+    RegressionTest, which is admin-editable from the regression test edit page.
 
     :param test: The test to retrieve the data for.
     :type test: Test
     """
     populated_categories = g.db.query(regressionTestLinkTable.c.category_id).subquery()
     categories = Category.query.filter(Category.id.in_(populated_categories)).order_by(Category.name.asc()).all()
+
+    # Collect all regression test IDs that are part of this test run
+    all_rt_ids = set(test.get_customized_regressiontests())
+
     results = [{
         'category': category,
         'tests': [{
             'test': rt,
             'result': next((r for r in test.results if r.regression_test_id == rt.id), None),
             'files': TestResultFile.query.filter(
                 and_(TestResultFile.test_id == test.id, TestResultFile.regression_test_id == rt.id)
-            ).all()
-        } for rt in category.regression_tests if rt.id in test.get_customized_regressiontests()]
+            ).all(),
+            'error': False,
+            'status': TestResultStatus.passed
+        } for rt in category.regression_tests if rt.id in all_rt_ids]
     } for category in categories]
     # Run through the categories to see if they should be marked as failed or passed. A category failed if one or more
     # tests in said category failed.
@@ -109,6 +134,15 @@
                 category_test['files'] = [TestResultFile(-1, -1, -1, '', got)]
             # Store test status in error field
             category_test['error'] = test_error
+
+            # --- Three-way classification: passed / failed / never_worked ---
+            if not test_error:
+                category_test['status'] = TestResultStatus.passed
+            elif category_test['test'].never_worked:
+                category_test['status'] = TestResultStatus.never_worked
+            else:
+                category_test['status'] = TestResultStatus.failed
+
             # Update category error
             error = error or test_error
         category['error'] = error

@@ -13,6 +13,7 @@
 import datetime
 import os
 import string
+from enum import Enum
 from typing import Any, Dict, List, Tuple, Type, Union
 
 import pytz
@@ -75,6 +76,21 @@ def stages() -> List[Tuple[str, str]]:
         return [TestStatus.preparation, TestStatus.testing, TestStatus.completed]
 
 
+class TestResultStatus(Enum):
+    """Classification of a regression test result within a specific test run.
+
+    This is NOT stored in the database. It is derived at query time from:
+    - Output hash/status comparison (passed vs failed)
+    - The `never_worked` boolean flag on the RegressionTest model.
+    """
+    passed = "passed"
+    """Test produced the exact expected output."""
+    failed = "failed"
+    """Test produced output that differed from expected."""
+    never_worked = "never_worked"
+    """Test failed and is flagged as never having worked before."""
+
+
 class Fork(Base):
     """Model to store and manage fork."""
 

@@ -41,6 +41,14 @@ NOTE: The following tests have been failing on the master branch as well as the
 {% endfor %}
 </ul>
 {% endif %}
+{% if comment_info.never_worked_tests | length %}
+⚠️ <b>Note:</b> The following tests have NEVER passed on any version of CCExtractor for this platform. These are pre-existing issues and are NOT caused by this PR:
+<ul>
+{% for test in comment_info.never_worked_tests %}
+<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a></li>
+{% endfor %}
+</ul>
+{% endif %}
 {% if comment_info.fixed_tests | length %}
 Congratulations: Merging this PR would fix the following tests:
 <ul>

@@ -74,7 +74,7 @@ <h1>Regression tests</h1>
                         {% endif %}
                         <tr data-category="[{{ test.categories|join(', ', attribute='id') }}]">
                             <td>{{ test.id }}</td>
-                            <td>{{ test.command }}</td>
+                            <td>{{ test.command }}{% if test.never_worked %} <span class="label secondary" title="This test has been marked as never having produced correct output">Never worked</span>{% endif %}</td>
                             <td id="status-toggle-{{test.id}}">{{ test.active }}</td>
                             <td>
                                 <a href="{{ url_for('.test_view', regression_id=test.id) }}" title="View details"><i class="fa fa-info-circle"></i></a>&nbsp;

@@ -56,6 +56,12 @@ <h5>Regression Test Add</h5>
             <div class="medium-12 columns">
                 {{ macros.render_field(form.expected_rc) }}
             </div>
+            <div class="medium-12 columns">
+                <label for="never_worked">
+                    {{ form.never_worked() }}
+                    {{ form.never_worked.label.text }}
+                </label>
+            </div>
             <div class="medium-12 columns">
                 {{ macros.render_field(form.submit) }}
             </div>

@@ -56,6 +56,12 @@ <h1>Regression Test Edit</h1>
                 <div class="medium-12 columns">
                     {{ macros.render_field(form.expected_rc) }}
                 </div>
+                <div class="medium-12 columns">
+                    <label for="never_worked">
+                        {{ form.never_worked() }} {{ form.never_worked.label.text }}
+                        <span style="color: #999; font-size: 0.85em;">(Check if this test has never produced correct output on any CCExtractor version)</span>
+                    </label>
+                </div>
                 <div class="medium-12 columns">
                     {{ macros.render_field(form.submit) }}
                 </div>

@@ -12,6 +12,15 @@ <h1>Regression test  {{ test.id }}</h1>
             <p>Input type: {{ test.input_type.description }}</p>
             <p>Output type: {{ test.output_type.description }}</p>
             <p>Description: {{ test.description or "No description" }}</p>
+            <p>Status:
+                {% if test.never_worked %}
+                    <span class="label secondary">Never worked</span>
+                    <span style="color: #999; font-size: 0.85em;">(This test has never produced correct output on any CCExtractor version)</span>
+                {% else %}
+                    <span class="label success">Normal</span>
+                    <span style="color: #999; font-size: 0.85em;">(This test has not been flagged as never having worked)</span>
+                {% endif %}
+            </p>
             <p id="tags" href="#tags">
                 {% set sample = test.sample %}
                 Tags of sample:

@@ -14,6 +14,9 @@
             display: block;
             text-align: center;
         }
+        .category-header.never-worked {
+            color: #777;
+        }
     </style>
 {% endblock %}
 
@@ -112,7 +115,19 @@ <h2>An error occurred</h2>
                         <h2>Test results</h2>
                         <p>Click on the category names to expand the results, and on "Fail" to see the differences.</p>
                         {% for result in results %}
-                            <h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-category="{{ result.category.id }}">{{ result.category.name }} - {{ 'Fail' if result.error else 'Pass' }}</h4>
+                            {% set category_class = 'fail' if result.error else 'pass' %}
+                            {% if result.error %}
+                                {% set ns = namespace(all_nw=true) %}
+                                {% for t in result.tests %}
+                                    {% if t.error and t.status.value != 'never_worked' %}
+                                        {% set ns.all_nw = false %}
+                                    {% endif %}
+                                {% endfor %}
+                                {% if ns.all_nw %}
+                                    {% set category_class = 'never-worked' %}
+                                {% endif %}
+                            {% endif %}
+                            <h4 class="category-header {{ category_class }}" data-category="{{ result.category.id }}">{{ result.category.name }} - {% if category_class == 'pass' %}Pass{% elif category_class == 'never-worked' %}Never Worked{% else %}Fail{% endif %}</h4>
                             {% if result.tests | length != 0 %}
                             <div id="no-more-tables">
                                 <table class="col-md-12 table-bordered table-striped table-condensed sortable hide cf" data-category="{{ result.category.id }}">
@@ -135,6 +150,9 @@ <h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-catego
                                                 <td data-title="Runtime (ms)">{{ test.result.runtime }}</td>
                                                 <td data-title="Exit Code">{{ test.result.exit_code }}{{ '' if test.result.exit_code == test.result.expected_rc else ' (Expected '~test.result.expected_rc~')' }}</td>
                                                 <td data-title="Result">
+                                                    {% if test.status is defined and test.status.value == 'never_worked' %}
+                                                        <span class="label secondary" title="This test has never passed on any CCExtractor version for this platform">Never worked</span>
+                                                    {% endif %}
                                                     {% for file in test.files -%}
                                                         {% if file.got is not none %}
                                                           {% set no_error = namespace(found=False) %}