Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions migrations/versions/d9f4e5a6b7c8_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Add never_worked column to regression_test table

Revision ID: d9f4e5a6b7c8
Revises: c8f3a2b1d4e5
Create Date: 2026-03-20 23:25:21.411651000000

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = 'd9f4e5a6b7c8'
down_revision = 'c8f3a2b1d4e5'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('regression_test', sa.Column('never_worked', sa.Boolean(), nullable=False, server_default='false'))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('regression_test', 'never_worked')
# ### end Alembic commands ###
26 changes: 17 additions & 9 deletions mod_ci/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
from mod_sample.models import Issue
from mod_test.controllers import get_test_results
from mod_test.models import (Fork, Test, TestPlatform, TestProgress,
TestResult, TestResultFile, TestStatus, TestType)
TestResult, TestResultFile, TestResultStatus,
TestStatus, TestType)
from utility import is_valid_signature, request_from_github

# Timeout constants (in seconds)
Expand Down Expand Up @@ -2756,6 +2757,7 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
extra_failed_tests = []
common_failed_tests = []
fixed_tests = []
never_worked_tests = []
category_stats = []

test_results = get_test_results(test)
Expand All @@ -2764,20 +2766,23 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo:
category_name = category_results['category'].name

category_test_pass_count = 0
for test in category_results['tests']:
if not test['error']:
for t in category_results['tests']:
if not t['error']:
category_test_pass_count += 1
if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
fixed_tests.append(test['test'])
if last_test_master and getattr(t['test'], platform_column) != last_test_master.id:
fixed_tests.append(t['test'])
else:
if last_test_master and getattr(test['test'], platform_column) != last_test_master.id:
common_failed_tests.append(test['test'])
# Separate out tests that have NEVER passed on any CCExtractor version
if t['status'] == TestResultStatus.never_worked:
never_worked_tests.append(t['test'])
elif last_test_master and getattr(t['test'], platform_column) != last_test_master.id:
common_failed_tests.append(t['test'])
else:
extra_failed_tests.append(test['test'])
extra_failed_tests.append(t['test'])

category_stats.append(CategoryTestInfo(category_name, len(category_results['tests']), category_test_pass_count))

return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master)
return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master, never_worked_tests)


def comment_pr(test: Test) -> str:
Expand Down Expand Up @@ -2813,6 +2818,9 @@ def comment_pr(test: Test) -> str:
log.debug(f"GitHub PR Comment ID {comment.id} Uploaded for Test_id: {test_id}")
except Exception as e:
log.error(f"GitHub PR Comment Failed for Test_id: {test_id} with Exception {e}")

# Determine PR status:
# SUCCESS if no regressions caused by PR (never_worked tests don't count)
return Status.SUCCESS if len(comment_info.extra_failed_tests) == 0 else Status.FAILURE


Expand Down
1 change: 1 addition & 0 deletions mod_ci/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,4 @@ class PrCommentInfo:
fixed_tests: List[RegressionTest]
common_failed_tests: List[RegressionTest]
last_test_master: Test
never_worked_tests: List[RegressionTest]
3 changes: 3 additions & 0 deletions mod_regression/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def test_edit(regression_id):
test.input_type = InputType.from_string(form.input_type.data)
test.output_type = OutputType.from_string(form.output_type.data)
test.description = form.description.data
test.never_worked = form.never_worked.data

g.db.commit()
g.log.info(f'regression test with id: {regression_id} updated!')
Expand All @@ -174,6 +175,7 @@ def test_edit(regression_id):
form.input_type.data = test.input_type.value
form.output_type.data = test.output_type.value
form.description.data = test.description
form.never_worked.data = test.never_worked

return {'form': form, 'regression_id': regression_id}

Expand Down Expand Up @@ -247,6 +249,7 @@ def test_add():
input_type=InputType.from_string(form.input_type.data),
output_type=OutputType.from_string(form.output_type.data),
description=form.description.data,
never_worked=form.never_worked.data
)
g.db.add(new_test)
category = Category.query.filter(Category.id == form.category_id.data).first()
Expand Down
5 changes: 3 additions & 2 deletions mod_regression/forms.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Maintain forms related to CRUD operations on regression tests."""

from flask_wtf import FlaskForm
from wtforms import (HiddenField, IntegerField, SelectField, StringField,
SubmitField, TextAreaField)
from wtforms import (BooleanField, HiddenField, IntegerField, SelectField,
StringField, SubmitField, TextAreaField)
from wtforms.validators import DataRequired, InputRequired, Length

from mod_regression.models import InputType, OutputType
Expand Down Expand Up @@ -36,6 +36,7 @@ class CommonTestForm(FlaskForm):
)
category_id = SelectField("Category", coerce=int)
expected_rc = IntegerField("Expected Runtime Code", [InputRequired(message="Expected Runtime Code can't be empty")])
never_worked = BooleanField("Never Worked", default=False)


class AddTestForm(CommonTestForm):
Expand Down
9 changes: 7 additions & 2 deletions mod_regression/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ class RegressionTest(Base):
last_passed_on_windows = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
last_passed_on_linux = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL"))
description = Column(String(length=1024))
never_worked = Column(Boolean(), default=False, nullable=False, server_default='false')

def __init__(self, sample_id, command, input_type, output_type, category_id, expected_rc,
active=True, description="") -> None:
active=True, description="", never_worked=False) -> None:
"""
Parametrized constructor for the RegressionTest model.

Expand All @@ -117,7 +118,10 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
:type expected_rc: int
:param active: The value of the 'active' field of RegressionTest model
:type active: bool

:param description: The value of the 'description' field of RegressionTest model
:type description: str
:param never_worked: Boolean flag whether the test has never worked for this sample
:type never_worked: bool
"""
self.sample_id = sample_id
self.command = command
Expand All @@ -127,6 +131,7 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp
self.expected_rc = expected_rc
self.active = active
self.description = description
self.never_worked = never_worked

def __repr__(self) -> str:
"""
Expand Down
46 changes: 40 additions & 6 deletions mod_test/controllers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Logic to find all tests, their progress and details of individual test."""

import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional, TypedDict

from flask import (Blueprint, Response, abort, g, jsonify, redirect, request,
url_for)
Expand All @@ -16,9 +16,25 @@
from mod_regression.models import (Category, RegressionTestOutput,
regressionTestLinkTable)
from mod_test.models import (Fork, Test, TestPlatform, TestProgress,
TestResult, TestResultFile, TestStatus, TestType)
TestResult, TestResultFile, TestResultStatus,
TestStatus, TestType)
from utility import serve_file_download


class CategoryTestItem(TypedDict):
test: Any # RegressionTest
result: Optional[TestResult]
files: List[TestResultFile]
error: bool
status: TestResultStatus


class CategoryResult(TypedDict):
category: Category
tests: List[CategoryTestItem]
error: bool


mod_test = Blueprint('test', __name__)


Expand Down Expand Up @@ -53,24 +69,33 @@
}


def get_test_results(test) -> List[Dict[str, Any]]:
def get_test_results(test) -> List[CategoryResult]:

Check failure on line 72 in mod_test/controllers.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 45 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=CCExtractor_sample-platform&issues=AZ0SEpX5EnrKnygjpFwF&open=AZ0SEpX5EnrKnygjpFwF&pullRequest=1071
"""
Get test results for each category.
Get test results for each category, with three-way pass/fail/never_worked classification.

The never_worked status is determined by the explicit `never_worked` boolean flag on each
RegressionTest, which is admin-editable from the regression test edit page.

:param test: The test to retrieve the data for.
:type test: Test
"""
populated_categories = g.db.query(regressionTestLinkTable.c.category_id).subquery()
categories = Category.query.filter(Category.id.in_(populated_categories)).order_by(Category.name.asc()).all()

# Collect all regression test IDs that are part of this test run
all_rt_ids = set(test.get_customized_regressiontests())

results = [{
'category': category,
'tests': [{
'test': rt,
'result': next((r for r in test.results if r.regression_test_id == rt.id), None),
'files': TestResultFile.query.filter(
and_(TestResultFile.test_id == test.id, TestResultFile.regression_test_id == rt.id)
).all()
} for rt in category.regression_tests if rt.id in test.get_customized_regressiontests()]
).all(),
'error': False,
'status': TestResultStatus.passed
} for rt in category.regression_tests if rt.id in all_rt_ids]
} for category in categories]
# Run through the categories to see if they should be marked as failed or passed. A category failed if one or more
# tests in said category failed.
Expand Down Expand Up @@ -109,6 +134,15 @@
category_test['files'] = [TestResultFile(-1, -1, -1, '', got)]
# Store test status in error field
category_test['error'] = test_error

# --- Three-way classification: passed / failed / never_worked ---
if not test_error:
category_test['status'] = TestResultStatus.passed
elif category_test['test'].never_worked:
category_test['status'] = TestResultStatus.never_worked
else:
category_test['status'] = TestResultStatus.failed

# Update category error
error = error or test_error
category['error'] = error
Expand Down
16 changes: 16 additions & 0 deletions mod_test/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import datetime
import os
import string
from enum import Enum
from typing import Any, Dict, List, Tuple, Type, Union

import pytz
Expand Down Expand Up @@ -75,6 +76,21 @@ def stages() -> List[Tuple[str, str]]:
return [TestStatus.preparation, TestStatus.testing, TestStatus.completed]


class TestResultStatus(Enum):
"""Classification of a regression test result within a specific test run.

This is NOT stored in the database. It is derived at query time from:
- Output hash/status comparison (passed vs failed)
- The `never_worked` boolean flag on the RegressionTest model.
"""
passed = "passed"
"""Test produced the exact expected output."""
failed = "failed"
"""Test produced output that differed from expected."""
never_worked = "never_worked"
"""Test failed and is flagged as never having worked before."""


class Fork(Base):
"""Model to store and manage fork."""

Expand Down
8 changes: 8 additions & 0 deletions templates/ci/pr_comment.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ NOTE: The following tests have been failing on the master branch as well as the
{% endfor %}
</ul>
{% endif %}
{% if comment_info.never_worked_tests | length %}
⚠️ <b>Note:</b> The following tests have NEVER passed on any version of CCExtractor for this platform. These are pre-existing issues and are NOT caused by this PR:
<ul>
{% for test in comment_info.never_worked_tests %}
<li> ccextractor {{ test.command }} <a href="{{ url_for('sample.sample_by_id', sample_id=test.sample.id, _external=True) }}">{{ test.sample.sha[:10] }}...</a></li>
{% endfor %}
</ul>
{% endif %}
{% if comment_info.fixed_tests | length %}
Congratulations: Merging this PR would fix the following tests:
<ul>
Expand Down
2 changes: 1 addition & 1 deletion templates/regression/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ <h1>Regression tests</h1>
{% endif %}
<tr data-category="[{{ test.categories|join(', ', attribute='id') }}]">
<td>{{ test.id }}</td>
<td>{{ test.command }}</td>
<td>{{ test.command }}{% if test.never_worked %} <span class="label secondary" title="This test has been marked as never having produced correct output">Never worked</span>{% endif %}</td>
<td id="status-toggle-{{test.id}}">{{ test.active }}</td>
<td>
<a href="{{ url_for('.test_view', regression_id=test.id) }}" title="View details"><i class="fa fa-info-circle"></i></a>&nbsp;
Expand Down
6 changes: 6 additions & 0 deletions templates/regression/test_add.html
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ <h5>Regression Test Add</h5>
<div class="medium-12 columns">
{{ macros.render_field(form.expected_rc) }}
</div>
<div class="medium-12 columns">
<label for="never_worked">
{{ form.never_worked() }}
{{ form.never_worked.label.text }}
</label>
</div>
<div class="medium-12 columns">
{{ macros.render_field(form.submit) }}
</div>
Expand Down
6 changes: 6 additions & 0 deletions templates/regression/test_edit.html
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ <h1>Regression Test Edit</h1>
<div class="medium-12 columns">
{{ macros.render_field(form.expected_rc) }}
</div>
<div class="medium-12 columns">
<label for="never_worked">
{{ form.never_worked() }} {{ form.never_worked.label.text }}
<span style="color: #999; font-size: 0.85em;">(Check if this test has never produced correct output on any CCExtractor version)</span>
</label>
</div>
<div class="medium-12 columns">
{{ macros.render_field(form.submit) }}
</div>
Expand Down
9 changes: 9 additions & 0 deletions templates/regression/test_view.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ <h1>Regression test {{ test.id }}</h1>
<p>Input type: {{ test.input_type.description }}</p>
<p>Output type: {{ test.output_type.description }}</p>
<p>Description: {{ test.description or "No description" }}</p>
<p>Status:
{% if test.never_worked %}
<span class="label secondary">Never worked</span>
<span style="color: #999; font-size: 0.85em;">(This test has never produced correct output on any CCExtractor version)</span>
{% else %}
<span class="label success">Normal</span>
<span style="color: #999; font-size: 0.85em;">(This test has not been flagged as never having worked)</span>
{% endif %}
</p>
<p id="tags" href="#tags">
{% set sample = test.sample %}
Tags of sample:
Expand Down
20 changes: 19 additions & 1 deletion templates/test/by_id.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
display: block;
text-align: center;
}
.category-header.never-worked {
color: #777;
}
</style>
{% endblock %}

Expand Down Expand Up @@ -112,7 +115,19 @@ <h2>An error occurred</h2>
<h2>Test results</h2>
<p>Click on the category names to expand the results, and on "Fail" to see the differences.</p>
{% for result in results %}
<h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-category="{{ result.category.id }}">{{ result.category.name }} - {{ 'Fail' if result.error else 'Pass' }}</h4>
{% set category_class = 'fail' if result.error else 'pass' %}
{% if result.error %}
{% set ns = namespace(all_nw=true) %}
{% for t in result.tests %}
{% if t.error and t.status.value != 'never_worked' %}
{% set ns.all_nw = false %}
{% endif %}
{% endfor %}
{% if ns.all_nw %}
{% set category_class = 'never-worked' %}
{% endif %}
{% endif %}
<h4 class="category-header {{ category_class }}" data-category="{{ result.category.id }}">{{ result.category.name }} - {% if category_class == 'pass' %}Pass{% elif category_class == 'never-worked' %}Never Worked{% else %}Fail{% endif %}</h4>
{% if result.tests | length != 0 %}
<div id="no-more-tables">
<table class="col-md-12 table-bordered table-striped table-condensed sortable hide cf" data-category="{{ result.category.id }}">
Expand All @@ -135,6 +150,9 @@ <h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-catego
<td data-title="Runtime (ms)">{{ test.result.runtime }}</td>
<td data-title="Exit Code">{{ test.result.exit_code }}{{ '' if test.result.exit_code == test.result.expected_rc else ' (Expected '~test.result.expected_rc~')' }}</td>
<td data-title="Result">
{% if test.status is defined and test.status.value == 'never_worked' %}
<span class="label secondary" title="This test has never passed on any CCExtractor version for this platform">Never worked</span>
{% endif %}
{% for file in test.files -%}
{% if file.got is not none %}
{% set no_error = namespace(found=False) %}
Expand Down
Loading