Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions uncertainty-calibration-assistant/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Uncertainty Calibration Assistant

This module adds a focused uncertainty-calibration assistant for the AI-Powered Research Assistant Suite.

It reviews manuscript claims before submission and checks whether the wording, declared confidence, statistical evidence, replication status, and limitation disclosures agree. The assistant produces:

- calibrated claim wording
- peer-review findings for overclaiming and missing uncertainty evidence
- reproducibility confidence
- research-gap opportunities when claims are low-confidence or replication has failed
- deterministic reviewer packets and audit digests

## Run

```sh
node uncertainty-calibration-assistant/test.js
node uncertainty-calibration-assistant/demo.js
```

The demo writes JSON and Markdown reviewer artifacts to `uncertainty-calibration-assistant/reports/`.

## Review Surface

The implementation is dependency-free, uses synthetic data only, and does not call external APIs or read credentials.
26 changes: 26 additions & 0 deletions uncertainty-calibration-assistant/acceptance-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Acceptance Notes

## What Changed

Added `uncertainty-calibration-assistant/`, a self-contained pre-submission assistant that calibrates scientific claims against evidence strength, statistical uncertainty, replication status, and limitation disclosures.

## How To Validate

Run:

```sh
node uncertainty-calibration-assistant/test.js
node uncertainty-calibration-assistant/demo.js
```

Optional syntax check:

```sh
node --check uncertainty-calibration-assistant/index.js
node --check uncertainty-calibration-assistant/test.js
node --check uncertainty-calibration-assistant/demo.js
```

## Why This Is Issue-Specific

Issue #16 explicitly calls for auto peer-review reports, claims-vs-evidence alignment, reproducibility checking, discrepancy flags, and research-gap generation. This implementation links those into a narrow uncertainty-calibration workflow: overconfident claims are rewritten, replication failures produce blockers, and low-confidence claims become research opportunities.
76 changes: 76 additions & 0 deletions uncertainty-calibration-assistant/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
const fs = require("fs");
const path = require("path");
const { evaluateUncertaintyCalibration } = require("./index");

const outputDir = path.join(__dirname, "reports");
fs.mkdirSync(outputDir, { recursive: true });

const packet = {
manuscriptId: "ms-cognitive-fatigue-042",
domain: "clinical neuroscience",
claims: [
{
id: "claim-a",
text: "The intervention definitively eliminates cognitive fatigue in post-viral patients.",
strength: "high",
limitations: [],
evidence: {
primaryData: true,
statisticalTest: "mixed-effects model",
effectSize: 0.26,
pValue: 0.04,
sampleSize: 22,
citations: ["doi:10.5555/pilot-fatigue"],
replication: { status: "non-deterministic", runId: "rerun-17" },
},
},
{
id: "claim-b",
text: "The wearable sleep signal suggests a reusable early-warning marker.",
strength: "moderate",
limitations: ["external cohort not yet enrolled"],
evidence: {
primaryData: true,
statisticalTest: "bootstrap stability",
effectSize: 0.39,
confidenceInterval: "95% CI 0.14-0.58",
sampleSize: 72,
citations: ["doi:10.5555/sleep-marker", "doi:10.5555/wearable-review"],
replication: { status: "passed", runId: "rerun-18" },
},
},
],
};

const report = evaluateUncertaintyCalibration(packet);
const jsonPath = path.join(outputDir, "uncertainty-calibration-report.json");
const markdownPath = path.join(outputDir, "uncertainty-calibration-report.md");

fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
fs.writeFileSync(
markdownPath,
[
"# Uncertainty Calibration Assistant Demo",
"",
`Decision: ${report.decision}`,
`Reproducibility confidence: ${report.reproducibilityConfidence}`,
`Audit digest: ${report.auditDigest}`,
"",
"## Calibrated Claims",
"",
...report.calibratedClaims.map((claim) => `- ${claim.id}: ${claim.calibrated} (${claim.confidence})`),
"",
"## Findings",
"",
...report.findings.map((finding) => `- ${finding.severity}: ${finding.code} - ${finding.message}`),
"",
"## Research Opportunities",
"",
...report.researchOpportunities.map((item) => `- ${item.priority}: ${item.claimId} - ${item.opportunity}`),
"",
].join("\n"),
);

console.log(`Wrote ${jsonPath}`);
console.log(`Wrote ${markdownPath}`);
console.log(`${report.decision}: ${report.findings.length} finding(s), ${report.auditDigest}`);
Binary file added uncertainty-calibration-assistant/demo.mp4
Binary file not shown.
25 changes: 25 additions & 0 deletions uncertainty-calibration-assistant/demo.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
227 changes: 227 additions & 0 deletions uncertainty-calibration-assistant/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
const crypto = require("crypto");

function asArray(value) {
if (!value) return [];
return Array.isArray(value) ? value : [value];
}

function stableStringify(value) {
if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
if (value && typeof value === "object") {
return `{${Object.keys(value)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`)
.join(",")}}`;
}
return JSON.stringify(value);
}

function digest(value) {
return crypto.createHash("sha256").update(stableStringify(value)).digest("hex");
}

function normalize(value) {
return String(value || "").trim().toLowerCase();
}

function hasAbsoluteLanguage(text) {
return /\b(proves?|definitive|conclusive|always|never|guarantees?|eliminates?|fully explains?)\b/i.test(text || "");
}

function severityRank(severity) {
return { blocker: 3, warning: 2, info: 1 }[severity] || 0;
}

function addFinding(findings, severity, code, claimId, message, remediation) {
findings.push({ severity, code, claimId, message, remediation });
}

function evidenceScore(evidence) {
const record = evidence || {};
let score = 0;
if (record.primaryData) score += 2;
if (record.statisticalTest) score += 1;
if (record.confidenceInterval) score += 1;
if (record.effectSize !== undefined && record.effectSize !== null) score += 1;
if (asArray(record.citations).length >= 2) score += 1;
if (record.replication && normalize(record.replication.status) === "passed") score += 2;
if (record.replication && normalize(record.replication.status) === "failed") score -= 3;
if (record.replication && normalize(record.replication.status) === "non-deterministic") score -= 2;
if (record.sampleSize && record.sampleSize >= 30) score += 1;
return score;
}

function calibrateWording(claim, findings) {
const text = claim.text || "";
const maxSeverity = findings.reduce(
(highest, finding) => (severityRank(finding.severity) > severityRank(highest) ? finding.severity : highest),
"info",
);

if (maxSeverity === "blocker") {
return text
.replace(/\bproves?\b/gi, "suggests")
.replace(/\bdefinitive(?:ly)?\b/gi, "preliminarily")
.replace(/\bconclusive\b/gi, "provisional")
.replace(/\beliminates?\b/gi, "reduces");
}

if (maxSeverity === "warning" && hasAbsoluteLanguage(text)) {
return text.replace(/\b(always|never|guarantees?)\b/gi, "may");
}

return text;
}

function evaluateUncertaintyCalibration(packet) {
const manuscript = packet || {};
const claims = asArray(manuscript.claims);
const findings = [];
const calibratedClaims = [];

if (!manuscript.manuscriptId || !manuscript.domain) {
addFinding(
findings,
"blocker",
"MANUSCRIPT_CONTEXT_MISSING",
"manuscript",
"Manuscript id and domain are required before adaptive review calibration can run.",
"Attach manuscript identity and domain metadata so the assistant can select the correct review standard.",
);
}

if (claims.length === 0) {
addFinding(
findings,
"blocker",
"NO_CLAIMS_TO_CALIBRATE",
"manuscript",
"No manuscript claims were provided for calibration.",
"Extract claim statements from the draft before running the pre-submission assistant.",
);
}

for (const claim of claims) {
const claimFindings = [];
const score = evidenceScore(claim.evidence);
const evidence = claim.evidence || {};
const requestedConfidence = normalize(claim.confidence || claim.strength);

if (!claim.id || !claim.text) {
addFinding(
claimFindings,
"blocker",
"CLAIM_IDENTITY_MISSING",
claim.id || "unknown",
"Claim is missing an id or statement text.",
"Store a stable claim id and the exact manuscript statement before generating review guidance.",
);
}

if (hasAbsoluteLanguage(claim.text) && score < 5) {
addFinding(
claimFindings,
"blocker",
"ABSOLUTE_LANGUAGE_UNDER_SUPPORTED",
claim.id,
"The claim uses high-certainty language without enough evidence support.",
"Replace absolute wording with calibrated language or attach stronger statistical, replication, and citation evidence.",
);
}

if ((requestedConfidence === "high" || requestedConfidence === "definitive") && score < 5) {
addFinding(
claimFindings,
"warning",
"CONFIDENCE_EXCEEDS_EVIDENCE",
claim.id,
"Declared confidence is higher than the evidence packet supports.",
"Lower the confidence rating or add primary data, confidence intervals, and replication evidence.",
);
}

if (!evidence.confidenceInterval && (evidence.pValue !== undefined || evidence.effectSize !== undefined)) {
addFinding(
claimFindings,
"warning",
"UNCERTAINTY_INTERVAL_MISSING",
claim.id,
"Statistical evidence lacks a confidence or credible interval.",
"Add interval estimates so reviewers can judge magnitude and uncertainty.",
);
}

if (evidence.replication && normalize(evidence.replication.status) === "failed") {
addFinding(
claimFindings,
"blocker",
"FAILED_REPLICATION_OVERCLAIM",
claim.id,
"A failed replication is attached but the claim is still presented as stable.",
"Route the claim through rebuttal, limitation, or research-gap framing before submission.",
);
}

if (asArray(claim.limitations).length === 0 && score < 5) {
addFinding(
claimFindings,
"warning",
"LIMITATION_DISCLOSURE_MISSING",
claim.id,
"Under-supported claim has no limitation disclosure.",
"Add a limitations sentence that explains sample, method, or reproducibility uncertainty.",
);
}

findings.push(...claimFindings);
calibratedClaims.push({
id: claim.id,
original: claim.text,
calibrated: calibrateWording(claim, claimFindings),
evidenceScore: score,
confidence: score >= 6 ? "high" : score >= 3 ? "moderate" : "low",
findingCodes: claimFindings.map((finding) => finding.code),
});
}

const blockers = findings.filter((finding) => finding.severity === "blocker");
const warnings = findings.filter((finding) => finding.severity === "warning");
const reproducibilitySignals = claims.map((claim) => normalize(claim.evidence && claim.evidence.replication && claim.evidence.replication.status));
const failedReplicationCount = reproducibilitySignals.filter((status) => status === "failed").length;
const passedReplicationCount = reproducibilitySignals.filter((status) => status === "passed").length;

const researchOpportunities = calibratedClaims
.filter((claim) => claim.confidence === "low" || claim.findingCodes.includes("FAILED_REPLICATION_OVERCLAIM"))
.map((claim) => ({
claimId: claim.id,
opportunity: "Run targeted replication or collect stronger primary evidence before making a high-certainty statement.",
priority: claim.findingCodes.includes("FAILED_REPLICATION_OVERCLAIM") ? "high" : "medium",
}));

const reviewPacket = {
decision: blockers.length > 0 ? "revise-before-submission" : warnings.length > 0 ? "calibrate-language" : "ready-for-review",
manuscriptId: manuscript.manuscriptId,
domain: manuscript.domain,
counts: {
blocker: blockers.length,
warning: warnings.length,
info: findings.filter((finding) => finding.severity === "info").length,
},
reproducibilityConfidence:
failedReplicationCount > 0 ? "low" : passedReplicationCount >= Math.max(1, claims.length - 1) ? "high" : "moderate",
calibratedClaims,
findings,
researchOpportunities,
};

return {
...reviewPacket,
auditDigest: digest(reviewPacket),
};
}

module.exports = {
evaluateUncertaintyCalibration,
evidenceScore,
stableStringify,
};
Loading