Skip to content

Commit 8efc2f8

Browse files
authored
Merge pull request #248 from constructive-io/fix/type-name-quoting
feat(deparser): add type-name quoting policy for minimal quoting in type positions
2 parents 48ea421 + 9104bf3 commit 8efc2f8

File tree

7 files changed

+264
-2
lines changed

7 files changed

+264
-2
lines changed

__fixtures__/generated/generated.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@
6060
"pretty/quoting-5.sql": "CREATE FUNCTION faker.boolean() RETURNS boolean AS $EOFCODE$\nBEGIN\n RETURN random() < 0.5;\nEND;\n$EOFCODE$ LANGUAGE plpgsql",
6161
"pretty/quoting-6.sql": "CREATE FUNCTION faker.\"boolean\"() RETURNS boolean AS $EOFCODE$\nBEGIN\n RETURN random() < 0.5;\nEND;\n$EOFCODE$ LANGUAGE plpgsql",
6262
"pretty/quoting-7.sql": "CREATE DOMAIN origin AS text CHECK (value = pg_catalog.\"substring\"(value, '^(https?://[^/]*)'))",
63+
"pretty/quoting-8.sql": "SELECT '{\"a\":1}'::json",
64+
"pretty/quoting-9.sql": "SELECT '{\"b\":2}'::jsonb",
65+
"pretty/quoting-10.sql": "SELECT true::boolean",
66+
"pretty/quoting-11.sql": "SELECT '1 day'::interval",
67+
"pretty/quoting-12.sql": "SELECT 42::int",
68+
"pretty/quoting-13.sql": "INSERT INTO test_table (data) VALUES ('{\"c\":3}'::json)",
69+
"pretty/quoting-14.sql": "SELECT '{\"d\":4}'::myschema.json",
70+
"pretty/quoting-15.sql": "SELECT 100::custom.int",
71+
"pretty/quoting-16.sql": "SELECT true::myapp.boolean",
6372
"pretty/procedures-1.sql": "SELECT handle_insert('TYPE_A')",
6473
"pretty/procedures-2.sql": "SELECT \"HandleInsert\"('TYPE_A', 'Region-1')",
6574
"pretty/procedures-3.sql": "SELECT compute_score(42, TRUE)",

__fixtures__/kitchen-sink/pretty/quoting.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,33 @@ $EOFCODE$ LANGUAGE plpgsql;
4545
-- Note: SUBSTRING(value FROM 'pattern') SQL syntax gets deparsed to pg_catalog."substring"(value, 'pattern')
4646
-- The SQL syntax form cannot be tested here due to AST round-trip differences (COERCE_SQL_SYNTAX vs COERCE_EXPLICIT_CALL)
4747
CREATE DOMAIN origin AS text CHECK (value = pg_catalog."substring"(value, '^(https?://[^/]*)'));
48+
49+
-- 8. Type name quoting: json type should NOT be quoted (COL_NAME_KEYWORD in type position)
50+
-- Type names follow a less strict quoting policy than standalone identifiers
51+
SELECT '{"a":1}'::json;
52+
53+
-- 9. Type name quoting: jsonb type should NOT be quoted
54+
SELECT '{"b":2}'::jsonb;
55+
56+
-- 10. Type name quoting: boolean type should NOT be quoted (TYPE_FUNC_NAME_KEYWORD in type position)
57+
SELECT true::boolean;
58+
59+
-- 11. Type name quoting: interval type should NOT be quoted (TYPE_FUNC_NAME_KEYWORD in type position)
60+
SELECT '1 day'::interval;
61+
62+
-- 12. Type name quoting: int type should NOT be quoted (COL_NAME_KEYWORD in type position)
63+
SELECT 42::int;
64+
65+
-- 13. Type cast in INSERT VALUES - json type should NOT be quoted
66+
INSERT INTO test_table (data) VALUES ('{"c":3}'::json);
67+
68+
-- 14. User-defined schema-qualified type with keyword name - should NOT quote the type name
69+
-- This tests the bug where non-pg_catalog types use quoteIdentifier() for ALL parts
70+
-- The type name 'json' is a COL_NAME_KEYWORD and should NOT be quoted in type position
71+
SELECT '{"d":4}'::myschema.json;
72+
73+
-- 15. User-defined schema-qualified type with keyword name 'int' - should NOT quote
74+
SELECT 100::custom.int;
75+
76+
-- 16. User-defined schema-qualified type with keyword name 'boolean' - should NOT quote
77+
SELECT true::myapp.boolean;

packages/deparser/QUOTING-RULES.md

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,99 @@ The only difference is that `quoteIdentifierAfterDot()` does not check for keywo
203203
| `interval` | `"interval"` | `interval` |
204204
| `my-col` | `"my-col"` | `"my-col"` |
205205

206+
## Type-Name Quoting Policy
207+
208+
Type names in PostgreSQL have their own quoting policy that is less strict than standalone identifiers but different from after-dot identifiers.
209+
210+
### The Problem
211+
212+
When you have a user-defined schema-qualified type like `myschema.json`, the type name `json` is a `COL_NAME_KEYWORD`. Using strict quoting (`quoteIdentifier()`) would produce `myschema."json"`, which is unnecessarily verbose.
213+
214+
However, we cannot use the fully relaxed after-dot policy (`quoteIdentifierAfterDot()`) because type names are not in the same permissive grammar slot as identifiers after a dot in qualified names.
215+
216+
### The Type-Name Quoting Algorithm
217+
218+
The `QuoteUtils.quoteIdentifierTypeName()` function implements a middle-ground policy:
219+
220+
- **Quote for lexical reasons**: uppercase, special characters, leading digits, embedded quotes
221+
- **Quote only RESERVED_KEYWORD**: keywords like `select`, `from`, `where` must be quoted
222+
- **Allow COL_NAME_KEYWORD unquoted**: keywords like `json`, `int`, `boolean` are allowed
223+
- **Allow TYPE_FUNC_NAME_KEYWORD unquoted**: keywords like `interval`, `left`, `right` are allowed
224+
225+
```
226+
function quoteIdentifierTypeName(ident):
227+
if ident is empty:
228+
return ident
229+
230+
safe = true
231+
232+
// Rule 1: First character must be lowercase letter or underscore
233+
if first_char not in [a-z_]:
234+
safe = false
235+
236+
// Rule 2: All characters must be in safe set
237+
for each char in ident:
238+
if char not in [a-z0-9_]:
239+
safe = false
240+
241+
// Rule 3: Only quote RESERVED_KEYWORD (not COL_NAME_KEYWORD or TYPE_FUNC_NAME_KEYWORD)
242+
if safe:
243+
kwKind = keywordKindOf(ident)
244+
if kwKind == RESERVED_KEYWORD:
245+
safe = false
246+
247+
if safe:
248+
return ident // No quoting needed
249+
250+
// Build quoted identifier with escaped embedded quotes
251+
result = '"'
252+
for each char in ident:
253+
if char == '"':
254+
result += '"'
255+
result += char
256+
result += '"'
257+
258+
return result
259+
```
260+
261+
### Comparison of Quoting Policies
262+
263+
| Input | quoteIdentifier() | quoteIdentifierAfterDot() | quoteIdentifierTypeName() |
264+
|-------|-------------------|---------------------------|---------------------------|
265+
| `mytable` | `mytable` | `mytable` | `mytable` |
266+
| `MyTable` | `"MyTable"` | `"MyTable"` | `"MyTable"` |
267+
| `json` | `"json"` | `json` | `json` |
268+
| `int` | `"int"` | `int` | `int` |
269+
| `boolean` | `"boolean"` | `boolean` | `boolean` |
270+
| `interval` | `"interval"` | `interval` | `interval` |
271+
| `select` | `"select"` | `select` | `"select"` |
272+
| `from` | `"from"` | `from` | `"from"` |
273+
274+
### quoteTypeDottedName(parts: string[])
275+
276+
For schema-qualified type names, use `quoteTypeDottedName()` which applies type-name quoting to all parts:
277+
278+
```typescript
279+
static quoteTypeDottedName(parts: string[]): string {
280+
if (!parts || parts.length === 0) return '';
281+
return parts.map(part => QuoteUtils.quoteIdentifierTypeName(part)).join('.');
282+
}
283+
```
284+
285+
### Examples
286+
287+
| Input Parts | Output |
288+
|-------------|--------|
289+
| `['json']` | `json` |
290+
| `['myschema', 'json']` | `myschema.json` |
291+
| `['custom', 'int']` | `custom.int` |
292+
| `['myapp', 'boolean']` | `myapp.boolean` |
293+
| `['myschema', 'select']` | `myschema."select"` |
294+
295+
### When to Use Type-Name Quoting
296+
297+
Use `quoteTypeDottedName()` in the `TypeName` handler for non-pg_catalog types. This ensures that user-defined types with keyword names are emitted with minimal quoting.
298+
206299
## Composition Helpers
207300

208301
### quoteDottedName(parts: string[])
@@ -359,13 +452,17 @@ When updating to support new PostgreSQL versions, ensure `kwlist.ts` is synchron
359452
| Dotted name (multi-part) | `quoteDottedName()` | `schema.table`, `schema.function` |
360453
| Two-part qualified name | `quoteQualifiedIdentifier()` | `schema.table` |
361454
| After-dot component only | `quoteIdentifierAfterDot()` | Indirection field access |
455+
| Type name (single or multi-part) | `quoteTypeDottedName()` | `myschema.json`, `custom.int` |
456+
| Type name component only | `quoteIdentifierTypeName()` | Type name part |
362457
| String literal | `escape()` or `formatEString()` | String values in SQL |
363458

364459
## Test Fixtures
365460

366461
The quoting behavior is verified by test fixtures in `__fixtures__/kitchen-sink/pretty/`:
367462

368463
- `quoting-1.sql` through `quoting-7.sql`: Test cases for `faker.float`, `faker.interval`, `faker.boolean`, and `pg_catalog.substring`
464+
- `quoting-8.sql` through `quoting-13.sql`: Test cases for type casts with `json`, `jsonb`, `boolean`, `interval`, `int`
465+
- `quoting-14.sql` through `quoting-16.sql`: Test cases for user-defined schema-qualified types with keyword names (`myschema.json`, `custom.int`, `myapp.boolean`)
369466

370467
The corresponding snapshots in `__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap` demonstrate the expected output with minimal quoting.
371468

packages/deparser/__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ $$ LANGUAGE plpgsql"
5050

5151
exports[`non-pretty: pretty/quoting-7.sql 1`] = `"CREATE DOMAIN origin AS text CHECK (value = pg_catalog.substring(value, '^(https?://[^/]*)'))"`;
5252

53+
exports[`non-pretty: pretty/quoting-8.sql 1`] = `"SELECT '{"a":1}'::json"`;
54+
55+
exports[`non-pretty: pretty/quoting-9.sql 1`] = `"SELECT '{"b":2}'::jsonb"`;
56+
57+
exports[`non-pretty: pretty/quoting-10.sql 1`] = `"SELECT CAST(true AS boolean)"`;
58+
59+
exports[`non-pretty: pretty/quoting-11.sql 1`] = `"SELECT '1 day'::interval"`;
60+
61+
exports[`non-pretty: pretty/quoting-12.sql 1`] = `"SELECT 42::int"`;
62+
63+
exports[`non-pretty: pretty/quoting-13.sql 1`] = `"INSERT INTO test_table (data) VALUES ('{"c":3}'::json)"`;
64+
65+
exports[`non-pretty: pretty/quoting-14.sql 1`] = `"SELECT CAST('{"d":4}' AS myschema.json)"`;
66+
67+
exports[`non-pretty: pretty/quoting-15.sql 1`] = `"SELECT CAST(100 AS custom.int)"`;
68+
69+
exports[`non-pretty: pretty/quoting-16.sql 1`] = `"SELECT CAST(true AS myapp.boolean)"`;
70+
5371
exports[`pretty: pretty/quoting-1.sql 1`] = `
5472
"CREATE FUNCTION faker.float(min double precision DEFAULT 0, max double precision DEFAULT 100) RETURNS double precision AS $$
5573
BEGIN
@@ -102,3 +120,26 @@ exports[`pretty: pretty/quoting-7.sql 1`] = `
102120
"CREATE DOMAIN origin AS text
103121
CHECK (value = pg_catalog.substring(value, '^(https?://[^/]*)'))"
104122
`;
123+
124+
exports[`pretty: pretty/quoting-8.sql 1`] = `"SELECT '{"a":1}'::json"`;
125+
126+
exports[`pretty: pretty/quoting-9.sql 1`] = `"SELECT '{"b":2}'::jsonb"`;
127+
128+
exports[`pretty: pretty/quoting-10.sql 1`] = `"SELECT CAST(true AS boolean)"`;
129+
130+
exports[`pretty: pretty/quoting-11.sql 1`] = `"SELECT '1 day'::interval"`;
131+
132+
exports[`pretty: pretty/quoting-12.sql 1`] = `"SELECT 42::int"`;
133+
134+
exports[`pretty: pretty/quoting-13.sql 1`] = `
135+
"INSERT INTO test_table (
136+
data
137+
) VALUES
138+
('{"c":3}'::json)"
139+
`;
140+
141+
exports[`pretty: pretty/quoting-14.sql 1`] = `"SELECT CAST('{"d":4}' AS myschema.json)"`;
142+
143+
exports[`pretty: pretty/quoting-15.sql 1`] = `"SELECT CAST(100 AS custom.int)"`;
144+
145+
exports[`pretty: pretty/quoting-16.sql 1`] = `"SELECT CAST(true AS myapp.boolean)"`;

packages/deparser/__tests__/pretty/quoting-pretty.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@ const prettyTest = new PrettyTest([
77
'pretty/quoting-5.sql',
88
'pretty/quoting-6.sql',
99
'pretty/quoting-7.sql',
10+
'pretty/quoting-8.sql',
11+
'pretty/quoting-9.sql',
12+
'pretty/quoting-10.sql',
13+
'pretty/quoting-11.sql',
14+
'pretty/quoting-12.sql',
15+
'pretty/quoting-13.sql',
16+
'pretty/quoting-14.sql',
17+
'pretty/quoting-15.sql',
18+
'pretty/quoting-16.sql',
1019
]);
1120

1221
prettyTest.generateTests();

packages/deparser/src/deparser.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,8 +1972,9 @@ export class Deparser implements DeparserVisitor {
19721972
}
19731973
}
19741974

1975-
const quotedNames = names.map((name: string) => QuoteUtils.quoteIdentifier(name));
1976-
let result = mods(quotedNames.join('.'), args);
1975+
// Use type-name quoting for non-pg_catalog types
1976+
// This allows keywords like 'json', 'int', 'boolean' to remain unquoted in type positions
1977+
let result = mods(QuoteUtils.quoteTypeDottedName(names), args);
19771978

19781979
if (node.arrayBounds && node.arrayBounds.length > 0) {
19791980
result += formatArrayBounds(node.arrayBounds);

packages/deparser/src/utils/quote-utils.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,5 +185,80 @@ export class QuoteUtils {
185185
}
186186
return QuoteUtils.quoteIdentifier(ident);
187187
}
188+
189+
/**
190+
* Quote an identifier that appears as a type name.
191+
*
192+
* Type names in PostgreSQL have a less strict quoting policy than standalone identifiers.
193+
* In type positions, COL_NAME_KEYWORD and TYPE_FUNC_NAME_KEYWORD are allowed unquoted
194+
* (e.g., 'json', 'int', 'boolean', 'interval'). Only RESERVED_KEYWORD must be quoted.
195+
*
196+
* This is different from:
197+
* - quoteIdentifier(): quotes all keywords except UNRESERVED_KEYWORD
198+
* - quoteIdentifierAfterDot(): only quotes for lexical reasons (no keyword checking)
199+
*
200+
* Type names still need quoting for lexical reasons (uppercase, special chars, etc.).
201+
*/
202+
static quoteIdentifierTypeName(ident: string): string {
203+
if (!ident) return ident;
204+
205+
let safe = true;
206+
207+
// Check first character: must be lowercase letter or underscore
208+
const firstChar = ident[0];
209+
if (!((firstChar >= 'a' && firstChar <= 'z') || firstChar === '_')) {
210+
safe = false;
211+
}
212+
213+
// Check all characters
214+
for (let i = 0; i < ident.length; i++) {
215+
const ch = ident[i];
216+
if ((ch >= 'a' && ch <= 'z') ||
217+
(ch >= '0' && ch <= '9') ||
218+
(ch === '_')) {
219+
// okay
220+
} else {
221+
safe = false;
222+
}
223+
}
224+
225+
if (safe) {
226+
// For type names, only quote RESERVED_KEYWORD
227+
// COL_NAME_KEYWORD and TYPE_FUNC_NAME_KEYWORD are allowed unquoted in type positions
228+
const kwKind = keywordKindOf(ident);
229+
if (kwKind === 'RESERVED_KEYWORD') {
230+
safe = false;
231+
}
232+
}
233+
234+
if (safe) {
235+
return ident; // no change needed
236+
}
237+
238+
// Build quoted identifier with escaped embedded quotes
239+
let result = '"';
240+
for (let i = 0; i < ident.length; i++) {
241+
const ch = ident[i];
242+
if (ch === '"') {
243+
result += '"'; // escape " as ""
244+
}
245+
result += ch;
246+
}
247+
result += '"';
248+
249+
return result;
250+
}
251+
252+
/**
253+
* Quote a dotted type name (e.g., schema.typename).
254+
*
255+
* For type names, we use type-name quoting for all parts since the entire
256+
* qualified name is in a type context. This allows keywords like 'json',
257+
* 'int', 'boolean' to remain unquoted in user-defined schema-qualified types.
258+
*/
259+
static quoteTypeDottedName(parts: string[]): string {
260+
if (!parts || parts.length === 0) return '';
261+
return parts.map(part => QuoteUtils.quoteIdentifierTypeName(part)).join('.');
262+
}
188263

189264
}

0 commit comments

Comments
 (0)