Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sqlglot-integration-tests
68 changes: 61 additions & 7 deletions sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2454,6 +2454,42 @@ def tablesample_sql(

return f" {tablesample_keyword or self.TABLESAMPLE_KEYWORDS} {method}{expr}{seed}"

def _pivot_in_value_aliases(self, expression: exp.Pivot) -> list[exp.Expression] | None:
Copy link
Copy Markdown
Collaborator

@georgesittas georgesittas May 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there may be a better way to achieve this, instead of actually re-generating the output columns and comparing them with what's stored in columns.

Today, there are three "knobs" that determine the PIVOT output column naming scheme:

  • _pivot_column_names
  • IDENTIFY_PIVOT_STRINGS
  • PREFIXED_PIVOT_COLUMNS

If we find a compact way to represent the information of what the value/logic for each of these knobs is for a dialect and store that in the AST, then it should be trivial to transpile by injecting PivotAliases: if source value vs target value differs, you directly add the aliases based on columns.

The bit that needs some thought is how you cleanly represent this info, given that the first one is currently a method that can be overridden and not just a flag like the other two.

# Returns the rewritten field.expressions list with PivotAlias wrappers injected where the
# stored column name (from pivot.args["columns"]) differs from the target dialect's natural
# name. Returns None if no rewrite is needed or applicable.
stored = expression.args.get("columns", [])
if not stored or len(expression.fields) != 1:
return None

# Only inject when SELECT * has already been expanded by qualify()
parent_select = expression.find_ancestor(exp.Select)
if not parent_select or any(isinstance(e, exp.Star) for e in parent_select.expressions):
return None
Comment on lines +2465 to +2468
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want to do this. The expanded columns will still differ in the source and target engines if you don't rename them during transpilation, so you're just pushing the problem further downstream.

Also, for future reference, you can generally do is_star on queries.


agg_aliases = [agg.alias for agg in expression.expressions if agg.alias]
step = len(agg_aliases) or 1
suffix = ("_" + agg_aliases[0]) if agg_aliases else ""
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this picking only the first alias? You can have multiple aggregations, each with a different alias.


new_exprs: list[exp.Expression] = []
modified = False
i = 0
for e in expression.fields[0].expressions:
if not isinstance(e, (exp.Literal, exp.PivotAlias)):
return None
Comment on lines +2478 to +2479
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should return here, what if some values have aliases but others don't?

if i >= len(stored) or (suffix and not stored[i].name.endswith(suffix)):
return None
stored_name = stored[i].name[: -len(suffix)] if suffix else stored[i].name
if not isinstance(e, exp.PivotAlias) and stored_name.lower() != e.alias_or_name.lower():
Comment on lines +2480 to +2483
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that some dialects don't use the aggregation alias as a suffix, but rather as a prefix. See here.

new_exprs.append(
exp.PivotAlias(this=e, alias=exp.to_identifier(stored_name, quoted=True))
)
modified = True
Comment on lines +2484 to +2487
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is quoted set to True preemptively?

else:
new_exprs.append(e)
i += step
return new_exprs if modified else None

def pivot_sql(self, expression: exp.Pivot) -> str:
expressions = self.expressions(expression, flat=True)
direction = "UNPIVOT" if expression.unpivot else "PIVOT"
Expand All @@ -2473,6 +2509,14 @@ def pivot_sql(self, expression: exp.Pivot) -> str:
sql = f"{direction} {this}{on}{into}{using}{group}"
return self.prepend_ctes(expression, sql)

if not expression.unpivot:
# Wrap IN-list values with explicit aliases where the target dialect's natural column
# name would differ from the stored name recorded by the source dialect's parser.
new_field_exprs = self._pivot_in_value_aliases(expression)
if new_field_exprs is not None:
expression = expression.copy()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't copy here, right? The generator preemptively copies all ASTs it converts into SQL.

expression.fields[0].set("expressions", new_field_exprs)

alias = self.sql(expression, "alias")
alias = f" AS {alias}" if alias else ""

Expand Down Expand Up @@ -3845,14 +3889,24 @@ def pivotalias_sql(self, expression: exp.PivotAlias) -> str:
parent = expression.parent
pivot = parent and parent.parent

if isinstance(pivot, exp.Pivot) and pivot.unpivot:
identifier_alias = isinstance(alias, exp.Identifier)
literal_alias = isinstance(alias, exp.Literal)
if isinstance(pivot, exp.Pivot):
if pivot.unpivot:
identifier_alias = isinstance(alias, exp.Identifier)
literal_alias = isinstance(alias, exp.Literal)

if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS:
alias.replace(exp.Literal.string(alias.output_name))
elif not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS:
alias.replace(exp.to_identifier(alias.output_name))
if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS:
alias.replace(exp.Literal.string(alias.output_name))
elif (
not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS
):
alias.replace(exp.to_identifier(alias.output_name))
elif getattr(
getattr(self.dialect, "parser_class", None), "IDENTIFY_PIVOT_STRINGS", False
):
Comment on lines +3903 to +3905
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we using getattr here?

# For IDENTIFY_PIVOT_STRINGS targets (e.g. Snowflake), string literals in the
# IN-list already produce the correct column names natively, so strip any alias
# that _pivot_in_value_aliases may have injected for other dialects.
return self.sql(expression, "this")
Comment on lines +3906 to +3909
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand this comment, can you give an example for why this is needed? I'd expect a simple "do source/target names differ?" check to suffice for transpilation purposes.


return self.alias_sql(expression)

Expand Down
Loading