Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,9 @@ export const vendorRiskAssessmentTask: Task<
minTimeoutInMs: 1000,
maxTimeoutInMs: 10000,
},
maxDuration: 1000 * 60 * 10,
// 30 minutes total: Firecrawl Agent can take up to 25 min on slow SPA
// trust centers (Ubiquiti), and deep-scrape + DB writes need room too.
maxDuration: 1000 * 60 * 30,
run: async (payload) => {
await tags.add([`org:${payload.organizationId}`]);

Expand Down Expand Up @@ -1061,7 +1063,19 @@ export const vendorRiskAssessmentTask: Task<
badgeCount: Array.isArray(complianceBadges)
? complianceBadges.length
: 0,
complianceBadgesJson: JSON.stringify(complianceBadges ?? null),
hasLogo: Boolean(logoUrl),
certificationsInAssessmentJson: JSON.stringify(
Array.isArray(
(coreData as { certifications?: unknown })?.certifications,
)
? (
coreData as {
certifications?: Array<{ type: string; status: string }>;
}
).certifications
: [],
),
});

// Update vendor with core data (keep status in_progress — news may still be loading)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import type { VendorRiskAssessmentCertification } from './agent-types';
import { pickDeepScrapeSourceUrl } from './deep-scrape-source-url';

const cert = (
overrides: Partial<VendorRiskAssessmentCertification> = {},
): VendorRiskAssessmentCertification => ({
type: 'SOC 2 Type II',
status: 'verified',
issuedAt: null,
expiresAt: null,
url: null,
...overrides,
});

describe('pickDeepScrapeSourceUrl', () => {
const vendorDomain = 'acme.com';

it("prefers 'Trust & Security' link over 'Security Overview'", () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [
{ label: 'Security Overview', url: 'https://acme.com/security' },
{ label: 'Trust & Security', url: 'https://acme.com/trust' },
],
certifications: [],
});
expect(result).toBe('https://acme.com/trust');
});

it("falls back to 'Security Overview' when no 'Trust & Security' link", () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [{ label: 'Security Overview', url: 'https://acme.com/security' }],
certifications: [],
});
expect(result).toBe('https://acme.com/security');
});

it('falls back to a verified cert URL on the vendor domain when no labelled links match', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({ url: 'https://acme.com/reports/soc2.pdf', status: 'verified' }),
],
});
expect(result).toBe('https://acme.com/reports/soc2.pdf');
});

it('skips subdomain-matching cert URL when status is not verified', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({ url: 'https://trust.acme.com/iso', status: 'unknown' }),
],
});
expect(result).toBeNull();
});

it('accepts subdomain-matching cert URL (same registrable domain)', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({ url: 'https://trust.acme.com/iso', status: 'verified' }),
],
});
expect(result).toBe('https://trust.acme.com/iso');
});

it('rejects off-domain labelled links', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [
{ label: 'Trust & Security', url: 'https://acme.trust.page' },
],
certifications: [],
});
expect(result).toBeNull();
});

it('rejects off-domain verified cert URL', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({ url: 'https://acme.safebase.io/soc2', status: 'verified' }),
],
});
expect(result).toBeNull();
});

it('rejects unparseable URLs', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [{ label: 'Trust & Security', url: 'not a url' }],
certifications: [cert({ url: 'also not a url', status: 'verified' })],
});
expect(result).toBeNull();
});

it('returns null when everything is empty', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [],
});
expect(result).toBeNull();
});

it('returns first verified cert URL and ignores later verified certs', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({
type: 'SOC 2',
status: 'verified',
url: 'https://acme.com/first.pdf',
}),
cert({
type: 'ISO 27001',
status: 'verified',
url: 'https://acme.com/second.pdf',
}),
],
});
expect(result).toBe('https://acme.com/first.pdf');
});

it('skips verified certs whose URL is null and continues to next cert', () => {
const result = pickDeepScrapeSourceUrl({
vendorDomain,
links: [],
certifications: [
cert({ type: 'SOC 2', status: 'verified', url: null }),
cert({
type: 'ISO 27001',
status: 'verified',
url: 'https://acme.com/iso.pdf',
}),
],
});
expect(result).toBe('https://acme.com/iso.pdf');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import type { VendorRiskAssessmentCertification } from './agent-types';

/**
* Resolve the best "source URL" to feed into `deepScrapeTrustPortal`.
*
* Fallback order:
* 1. The Agent-returned link labelled "Trust & Security" if it's on the vendor's domain.
* 2. The Agent-returned link labelled "Security Overview" if it's on the vendor's domain.
* 3. The URL of any verified certification that's on the vendor's domain.
*
* Returns null if nothing qualifies. Off-domain URLs are rejected at every tier —
* `deepScrapeTrustPortal` applies an additional third-party-portal gate, but
* this helper is the first line of defense against scraping an unrelated host.
*/
export function pickDeepScrapeSourceUrl(args: {
vendorDomain: string;
links: Array<{ label: string; url: string }>;
certifications: VendorRiskAssessmentCertification[];
}): string | null {
const { vendorDomain, links, certifications } = args;

const isOnVendorDomain = (url: string): boolean => {
try {
const host = new URL(url).hostname.toLowerCase();
return host === vendorDomain || host.endsWith(`.${vendorDomain}`);
} catch {
return false;
}
};

const byLabel = (label: string) =>
links.find((l) => l.label === label && isOnVendorDomain(l.url))?.url ??
null;

const trustUrl = byLabel('Trust & Security');
if (trustUrl) return trustUrl;

const securityUrl = byLabel('Security Overview');
if (securityUrl) return securityUrl;

for (const cert of certifications) {
if (cert.status !== 'verified') continue;
if (cert.url && isOnVendorDomain(cert.url)) return cert.url;
}

return null;
}
Loading
Loading