11import { z } from 'zod'
22import { defineRouteContract } from '@/lib/api/contracts/types'
3+ import { validateExternalUrl } from '@/lib/core/security/input-validation'
34import { CADENCE_TYPES , DESTINATION_TYPES , SOURCE_TYPES } from '@/lib/data-drains/types'
45
6+ /** AWS S3 bucket: 3-63 chars, lowercase alnum + . / -, see s3.ts for full rules. */
7+ const S3_BUCKET_NAME_RE = / ^ [ a - z 0 - 9 ] [ a - z 0 - 9 . - ] { 1 , 61 } [ a - z 0 - 9 ] $ /
8+ const S3_IPV4_LIKE_RE = / ^ ( \d { 1 , 3 } \. ) { 3 } \d { 1 , 3 } $ /
9+ const AWS_REGION_RE = / ^ [ a - z ] { 2 , } ( - [ a - z ] + ) + - \d + $ /
10+ /** GCS bucket component: lowercase alnum + _ / -, start/end alnum. Mirrors gcs.ts. */
11+ const GCS_BUCKET_COMPONENT_RE = / ^ [ a - z 0 - 9 ] [ a - z 0 - 9 _ - ] * [ a - z 0 - 9 ] $ /
12+ const GOOGLE_RESERVED_PREFIX_RE = / ^ ( g o o g | g o o g l e | g 0 0 g l e ) / i
13+ const GOOGLE_CONTAINS_RE = / ( g o o g l e | g 0 0 g l e ) / i
14+ function validateGcsBucketComponents ( v : string ) : string | null {
15+ if ( v . length < 3 || v . length > 222 ) return 'bucket must be 3-222 characters'
16+ const components = v . split ( '.' )
17+ for ( const c of components ) {
18+ if ( c . length < 3 || c . length > 63 ) {
19+ return 'each dot-separated component must be 3-63 characters'
20+ }
21+ if ( ! GCS_BUCKET_COMPONENT_RE . test ( c ) ) {
22+ return 'each component must be lowercase, start/end alphanumeric, letters/digits/_/- only'
23+ }
24+ }
25+ return null
26+ }
27+ /** Azure storage account: 3-24 lowercase alnum. */
28+ const AZURE_ACCOUNT_NAME_RE = / ^ [ a - z 0 - 9 ] { 3 , 24 } $ /
29+ /** Azure container: 3-63 chars, lowercase alnum + single hyphens. */
30+ const AZURE_CONTAINER_NAME_RE = / ^ [ a - z 0 - 9 ] ( [ a - z 0 - 9 ] | - (? ! - ) ) + [ a - z 0 - 9 ] $ /
31+ /** Azure Blob Storage endpoint suffixes (Public, US Gov, China, Germany). */
32+ const AZURE_ENDPOINT_SUFFIXES = [
33+ 'blob.core.windows.net' ,
34+ 'blob.core.usgovcloudapi.net' ,
35+ 'blob.core.chinacloudapi.cn' ,
36+ 'blob.core.cloudapi.de' ,
37+ ] as const
38+ /** BigQuery project / dataset / table identifiers. */
39+ const BQ_PROJECT_ID_RE = / ^ ( [ a - z ] [ a - z 0 - 9 . - ] { 0 , 61 } [ a - z 0 - 9 ] : ) ? [ a - z ] [ a - z 0 - 9 - ] { 4 , 28 } [ a - z 0 - 9 ] $ /
40+ const BQ_DATASET_RE = / ^ [ A - Z a - z 0 - 9 _ ] { 1 , 1024 } $ /
41+ const BQ_TABLE_RE = / ^ [ \p{ L} \p{ M} \p{ N} \p{ Pc} \p{ Pd} ] { 1 , 1024 } $ / u
42+ /** Snowflake account + identifier shapes — mirrored from snowflake.ts. */
43+ const SNOWFLAKE_ACCOUNT_ORG_RE = / ^ [ A - Z a - z 0 - 9 ] [ A - Z a - z 0 - 9 _ ] * (?: - [ A - Z a - z 0 - 9 _ ] + ) + $ /
44+ const SNOWFLAKE_ACCOUNT_LOCATOR_RE =
45+ / ^ [ A - Z a - z 0 - 9 ] [ A - Z a - z 0 - 9 _ ] * (?: \. [ A - Z a - z 0 - 9 ] [ A - Z a - z 0 - 9 _ - ] * ) { 0 , 2 } $ /
46+ const SNOWFLAKE_IDENTIFIER_RE = / ^ [ A - Z a - z _ ] [ A - Z a - z 0 - 9 _ $ ] { 0 , 254 } $ /
47+ /** Reserved Sim-namespaced header names that cannot be reused as the webhook signature header. */
48+ const RESERVED_WEBHOOK_SIGNATURE_HEADER_NAMES = new Set ( [
49+ 'authorization' ,
50+ 'content-type' ,
51+ 'user-agent' ,
52+ 'idempotency-key' ,
53+ 'x-sim-timestamp' ,
54+ 'x-sim-signature-version' ,
55+ 'x-sim-drain-id' ,
56+ 'x-sim-run-id' ,
57+ 'x-sim-source' ,
58+ 'x-sim-sequence' ,
59+ 'x-sim-row-count' ,
60+ 'x-sim-probe' ,
61+ 'x-sim-signature' ,
62+ ] )
63+
564export const dataDrainSourceSchema = z . enum ( SOURCE_TYPES )
665export const dataDrainDestinationTypeSchema = z . enum ( DESTINATION_TYPES )
766export const dataDrainCadenceSchema = z . enum ( CADENCE_TYPES )
@@ -20,10 +79,53 @@ export const dataDrainParamsSchema = z.object({
2079const drainNameSchema = z . string ( ) . trim ( ) . min ( 1 , 'name is required' ) . max ( 120 )
2180
2281const s3ConfigBodySchema = z . object ( {
23- bucket : z . string ( ) . min ( 1 , 'bucket is required' ) . max ( 255 ) ,
24- region : z . string ( ) . min ( 1 , 'region is required' ) . max ( 64 ) ,
25- prefix : z . string ( ) . max ( 512 ) . optional ( ) ,
26- endpoint : z . string ( ) . url ( ) . optional ( ) ,
82+ bucket : z
83+ . string ( )
84+ . min ( 3 , 'bucket must be 3-63 characters' )
85+ . max ( 63 , 'bucket must be 3-63 characters' )
86+ . refine ( ( v ) => S3_BUCKET_NAME_RE . test ( v ) , {
87+ message : 'bucket must be lowercase, 3-63 chars, start/end alphanumeric' ,
88+ } )
89+ . refine ( ( v ) => ! v . includes ( '..' ) , { message : 'bucket must not contain consecutive dots' } )
90+ . refine ( ( v ) => ! S3_IPV4_LIKE_RE . test ( v ) , { message : 'bucket must not look like an IP address' } )
91+ . refine ( ( v ) => ! v . startsWith ( 'xn--' ) , { message : 'bucket must not start with "xn--"' } )
92+ . refine ( ( v ) => ! v . startsWith ( 'sthree-' ) , { message : 'bucket must not start with "sthree-"' } )
93+ . refine ( ( v ) => ! v . startsWith ( 'amzn-s3-demo-' ) , {
94+ message : 'bucket must not start with "amzn-s3-demo-" (reserved by AWS)' ,
95+ } )
96+ . refine (
97+ ( v ) =>
98+ ! v . endsWith ( '-s3alias' ) &&
99+ ! v . endsWith ( '--ol-s3' ) &&
100+ ! v . endsWith ( '.mrap' ) &&
101+ ! v . endsWith ( '--x-s3' ) &&
102+ ! v . endsWith ( '--table-s3' ) ,
103+ {
104+ message :
105+ 'bucket must not end with reserved suffix (-s3alias, --ol-s3, .mrap, --x-s3, --table-s3)' ,
106+ }
107+ ) ,
108+ region : z
109+ . string ( )
110+ . min ( 1 , 'region is required' )
111+ . max ( 32 , 'region is too long' )
112+ . refine ( ( v ) => AWS_REGION_RE . test ( v ) , {
113+ message : 'region must look like an AWS region code, e.g. us-east-1' ,
114+ } ) ,
115+ prefix : z
116+ . string ( )
117+ . max ( 512 )
118+ . refine ( ( v ) => Buffer . byteLength ( v , 'utf8' ) <= 512 , {
119+ message : 'prefix must be at most 512 bytes (UTF-8)' ,
120+ } )
121+ . optional ( ) ,
122+ endpoint : z
123+ . string ( )
124+ . url ( )
125+ . refine ( ( value ) => validateExternalUrl ( value , 'endpoint' ) . isValid , {
126+ message : 'endpoint must be HTTPS and not point at a private, loopback, or metadata address' ,
127+ } )
128+ . optional ( ) ,
27129 forcePathStyle : z . boolean ( ) . optional ( ) ,
28130} )
29131
@@ -33,26 +135,65 @@ const s3CredentialsBodySchema = z.object({
33135} )
34136
35137const gcsConfigBodySchema = z . object ( {
36- bucket : z . string ( ) . min ( 3 , 'bucket must be at least 3 characters' ) . max ( 63 ) ,
37- prefix : z . string ( ) . max ( 512 ) . optional ( ) ,
138+ bucket : z
139+ . string ( )
140+ . min ( 3 , 'bucket must be 3-222 characters' )
141+ . max ( 222 , 'bucket must be 3-222 characters' )
142+ . superRefine ( ( v , ctx ) => {
143+ const err = validateGcsBucketComponents ( v )
144+ if ( err ) ctx . addIssue ( { code : z . ZodIssueCode . custom , message : err } )
145+ } )
146+ . refine ( ( v ) => ! S3_IPV4_LIKE_RE . test ( v ) , { message : 'bucket must not look like an IP address' } )
147+ . refine ( ( v ) => ! v . includes ( '..' ) , { message : 'bucket must not contain consecutive dots' } )
148+ . refine ( ( v ) => ! v . includes ( '-.' ) && ! v . includes ( '.-' ) , {
149+ message : 'bucket must not contain "-." or ".-"' ,
150+ } )
151+ . refine ( ( v ) => ! GOOGLE_RESERVED_PREFIX_RE . test ( v ) && ! GOOGLE_CONTAINS_RE . test ( v ) , {
152+ message : 'bucket name cannot begin with "goog" or contain "google" / close misspellings' ,
153+ } ) ,
154+ prefix : z
155+ . string ( )
156+ . max ( 512 )
157+ . refine ( ( v ) => Buffer . byteLength ( v , 'utf8' ) <= 512 , {
158+ message : 'prefix must be at most 512 bytes (UTF-8)' ,
159+ } )
160+ . refine ( ( v ) => ! v . startsWith ( '.well-known/acme-challenge/' ) , {
161+ message : 'prefix must not start with ".well-known/acme-challenge/" (reserved by GCS)' ,
162+ } )
163+ . optional ( ) ,
38164} )
39165
40166const gcsCredentialsBodySchema = z . object ( {
41167 serviceAccountJson : z . string ( ) . min ( 1 , 'serviceAccountJson is required' ) ,
42168} )
43169
44170const azureBlobConfigBodySchema = z . object ( {
45- accountName : z . string ( ) . min ( 1 , 'accountName is required' ) . max ( 24 ) ,
46- containerName : z . string ( ) . min ( 3 , 'containerName is required' ) . max ( 63 ) ,
171+ accountName : z
172+ . string ( )
173+ . min ( 1 , 'accountName is required' )
174+ . refine ( ( v ) => AZURE_ACCOUNT_NAME_RE . test ( v ) , {
175+ message : 'accountName must be 3-24 lowercase letters or digits' ,
176+ } ) ,
177+ containerName : z
178+ . string ( )
179+ . min ( 3 , 'containerName must be 3-63 characters' )
180+ . max ( 63 )
181+ . refine ( ( v ) => AZURE_CONTAINER_NAME_RE . test ( v ) , {
182+ message : 'containerName must use lowercase letters, digits, or single hyphens' ,
183+ } ) ,
47184 prefix : z . string ( ) . max ( 512 ) . optional ( ) ,
48- endpointSuffix : z . string ( ) . min ( 1 ) . max ( 128 ) . optional ( ) ,
185+ endpointSuffix : z
186+ . string ( )
187+ . refine ( ( v ) => ( AZURE_ENDPOINT_SUFFIXES as readonly string [ ] ) . includes ( v ) , {
188+ message : `endpointSuffix must be one of: ${ AZURE_ENDPOINT_SUFFIXES . join ( ', ' ) } ` ,
189+ } )
190+ . optional ( ) ,
49191} )
50192
51193const azureBlobCredentialsBodySchema = z . object ( {
52194 accountKey : z
53195 . string ( )
54- . min ( 64 , 'accountKey is too short to be a valid Azure storage key' )
55- . max ( 120 , 'accountKey is too long to be a valid Azure storage key' )
196+ . length ( 88 , 'accountKey must be 88 base64 characters (64-byte Azure storage key)' )
56197 . regex ( / ^ [ A - Z a - z 0 - 9 + / ] + = { 0 , 2 } $ / , {
57198 message : 'accountKey must be a base64-encoded Azure storage account key' ,
58199 } ) ,
@@ -69,37 +210,95 @@ const datadogCredentialsBodySchema = z.object({
69210} )
70211
71212const bigqueryConfigBodySchema = z . object ( {
72- projectId : z . string ( ) . min ( 6 ) . max ( 94 ) ,
73- datasetId : z . string ( ) . min ( 1 ) . max ( 1024 ) ,
74- tableId : z . string ( ) . min ( 1 ) . max ( 1024 ) ,
213+ projectId : z
214+ . string ( )
215+ . min ( 6 , 'projectId is required' )
216+ . max ( 94 )
217+ . refine ( ( v ) => BQ_PROJECT_ID_RE . test ( v ) , {
218+ message : 'projectId must match Google Cloud project ID rules' ,
219+ } ) ,
220+ datasetId : z
221+ . string ( )
222+ . min ( 1 , 'datasetId is required' )
223+ . refine ( ( v ) => BQ_DATASET_RE . test ( v ) , {
224+ message : 'datasetId may only contain letters, digits, and underscores (max 1024)' ,
225+ } ) ,
226+ tableId : z
227+ . string ( )
228+ . min ( 1 , 'tableId is required' )
229+ . refine ( ( v ) => BQ_TABLE_RE . test ( v ) , {
230+ message :
231+ 'tableId may contain Unicode letters, marks, numbers, connectors, dashes, and spaces (max 1024)' ,
232+ } )
233+ . refine ( ( v ) => Buffer . byteLength ( v , 'utf8' ) <= 1024 , {
234+ message : 'tableId must be at most 1024 bytes (UTF-8)' ,
235+ } ) ,
75236} )
76237
77238const bigqueryCredentialsBodySchema = z . object ( {
78239 serviceAccountJson : z . string ( ) . min ( 1 , 'serviceAccountJson is required' ) ,
79240} )
80241
81242const snowflakeConfigBodySchema = z . object ( {
82- account : z . string ( ) . min ( 3 ) ,
83- user : z . string ( ) . min ( 1 ) ,
84- warehouse : z . string ( ) . min ( 1 ) ,
85- database : z . string ( ) . min ( 1 ) ,
86- schema : z . string ( ) . min ( 1 ) ,
87- table : z . string ( ) . min ( 1 ) ,
88- column : z . string ( ) . min ( 1 ) . optional ( ) ,
89- role : z . string ( ) . min ( 1 ) . optional ( ) ,
243+ account : z
244+ . string ( )
245+ . min ( 3 , 'account is required' )
246+ . max ( 256 )
247+ . refine ( ( v ) => SNOWFLAKE_ACCOUNT_ORG_RE . test ( v ) || SNOWFLAKE_ACCOUNT_LOCATOR_RE . test ( v ) , {
248+ message :
249+ 'account must be a Snowflake org-account identifier (orgname-accountname) or legacy locator (locator[.region[.cloud]])' ,
250+ } ) ,
251+ user : z . string ( ) . min ( 1 , 'user is required' ) . regex ( SNOWFLAKE_IDENTIFIER_RE , {
252+ message : 'user must be a valid Snowflake identifier' ,
253+ } ) ,
254+ warehouse : z . string ( ) . min ( 1 ) . regex ( SNOWFLAKE_IDENTIFIER_RE , {
255+ message : 'warehouse must be a valid Snowflake identifier' ,
256+ } ) ,
257+ database : z . string ( ) . min ( 1 ) . regex ( SNOWFLAKE_IDENTIFIER_RE , {
258+ message : 'database must be a valid Snowflake identifier' ,
259+ } ) ,
260+ schema : z . string ( ) . min ( 1 ) . regex ( SNOWFLAKE_IDENTIFIER_RE , {
261+ message : 'schema must be a valid Snowflake identifier' ,
262+ } ) ,
263+ table : z . string ( ) . min ( 1 ) . regex ( SNOWFLAKE_IDENTIFIER_RE , {
264+ message : 'table must be a valid Snowflake identifier' ,
265+ } ) ,
266+ column : z
267+ . string ( )
268+ . min ( 1 )
269+ . regex ( SNOWFLAKE_IDENTIFIER_RE , { message : 'column must be a valid Snowflake identifier' } )
270+ . optional ( ) ,
271+ role : z
272+ . string ( )
273+ . min ( 1 )
274+ . regex ( SNOWFLAKE_IDENTIFIER_RE , { message : 'role must be a valid Snowflake identifier' } )
275+ . optional ( ) ,
90276} )
91277
92278const snowflakeCredentialsBodySchema = z . object ( {
93279 privateKey : z . string ( ) . min ( 1 , 'privateKey is required' ) ,
94280} )
95281
96282const webhookConfigBodySchema = z . object ( {
97- url : z . string ( ) . url ( 'url must be a valid URL' ) ,
98- signatureHeader : z . string ( ) . min ( 1 ) . max ( 128 ) . optional ( ) ,
283+ url : z
284+ . string ( )
285+ . url ( 'url must be a valid URL' )
286+ . max ( 2048 , 'url must be at most 2048 characters' )
287+ . refine ( ( value ) => validateExternalUrl ( value , 'url' ) . isValid , {
288+ message : 'url must be HTTPS and not point at a private, loopback, or metadata address' ,
289+ } ) ,
290+ signatureHeader : z
291+ . string ( )
292+ . min ( 1 )
293+ . max ( 128 )
294+ . refine ( ( value ) => ! RESERVED_WEBHOOK_SIGNATURE_HEADER_NAMES . has ( value . toLowerCase ( ) ) , {
295+ message : 'signatureHeader cannot reuse a reserved Sim header name' ,
296+ } )
297+ . optional ( ) ,
99298} )
100299
101300const webhookCredentialsBodySchema = z . object ( {
102- signingSecret : z . string ( ) . min ( 8 , 'signingSecret must be at least 8 characters' ) ,
301+ signingSecret : z . string ( ) . min ( 32 , 'signingSecret must be at least 32 characters' ) ,
103302 bearerToken : z . string ( ) . min ( 1 ) . optional ( ) ,
104303} )
105304
0 commit comments