Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ func (p *Projection) End() token.Position { return p.Position }
// ProjectionSelectQuery represents the SELECT part of a projection.
type ProjectionSelectQuery struct {
Position token.Position `json:"-"`
With []Expression `json:"with,omitempty"` // WITH clause expressions
Columns []Expression `json:"columns"`
GroupBy []Expression `json:"group_by,omitempty"`
OrderBy []Expression `json:"order_by,omitempty"` // ORDER BY columns
Expand Down Expand Up @@ -700,6 +701,7 @@ const (
AlterModifyOrderBy AlterCommandType = "MODIFY_ORDER_BY"
AlterModifySampleBy AlterCommandType = "MODIFY_SAMPLE_BY"
AlterRemoveSampleBy AlterCommandType = "REMOVE_SAMPLE_BY"
AlterApplyDeletedMask AlterCommandType = "APPLY_DELETED_MASK"
)

// TruncateQuery represents a TRUNCATE statement.
Expand Down Expand Up @@ -983,6 +985,7 @@ type RenameQuery struct {
To string `json:"to,omitempty"` // Deprecated: for backward compat
OnCluster string `json:"on_cluster,omitempty"`
Settings []*SettingExpr `json:"settings,omitempty"`
IfExists bool `json:"if_exists,omitempty"` // IF EXISTS modifier
}

func (r *RenameQuery) Pos() token.Position { return r.Position }
Expand Down
92 changes: 85 additions & 7 deletions internal/explain/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,31 @@ func isSimpleLiteralOrNegation(e ast.Expression) bool {
return false
}

// isSimpleLiteralOrNestedLiteral checks if an expression is a literal (including nested tuples/arrays of literals)
// Returns false for complex expressions like subqueries, function calls, identifiers, etc.
func isSimpleLiteralOrNestedLiteral(e ast.Expression) bool {
if lit, ok := e.(*ast.Literal); ok {
// For nested arrays/tuples, recursively check if all elements are also literals
if lit.Type == ast.LiteralArray || lit.Type == ast.LiteralTuple {
if exprs, ok := lit.Value.([]ast.Expression); ok {
for _, elem := range exprs {
if !isSimpleLiteralOrNestedLiteral(elem) {
return false
}
}
}
}
return true
}
// Unary minus of a literal integer/float is also simple (negative number)
if unary, ok := e.(*ast.UnaryExpr); ok && unary.Op == "-" {
if lit, ok := unary.Operand.(*ast.Literal); ok {
return lit.Type == ast.LiteralInteger || lit.Type == ast.LiteralFloat
}
}
return false
}

// containsOnlyArraysOrTuples checks if a slice of expressions contains
// only array or tuple literals (including empty arrays).
// Returns true if the slice is empty or contains only arrays/tuples.
Expand Down Expand Up @@ -952,16 +977,39 @@ func explainWithElement(sb *strings.Builder, n *ast.WithElement, indent string,
// When name is empty, don't show the alias part
switch e := n.Query.(type) {
case *ast.Literal:
// Empty tuples should be rendered as Function tuple, not Literal
// Tuples containing complex expressions (subqueries, function calls, etc) should be rendered as Function tuple
// But tuples of simple literals (including nested tuples of literals) stay as Literal
if e.Type == ast.LiteralTuple {
if exprs, ok := e.Value.([]ast.Expression); ok && len(exprs) == 0 {
if n.Name != "" {
fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Name, 1)
if exprs, ok := e.Value.([]ast.Expression); ok {
needsFunctionFormat := false
// Empty tuples always use Function tuple format
if len(exprs) == 0 {
needsFunctionFormat = true
} else {
fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1)
for _, expr := range exprs {
// Check if any element is a truly complex expression (not just a literal)
if !isSimpleLiteralOrNestedLiteral(expr) {
needsFunctionFormat = true
break
}
}
}
if needsFunctionFormat {
if n.Name != "" {
fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Name, 1)
} else {
fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1)
}
if len(exprs) > 0 {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs))
} else {
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
}
for _, expr := range exprs {
Node(sb, expr, depth+2)
}
return
}
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
return
}
}
// Arrays containing non-literal expressions should be rendered as Function array
Expand Down Expand Up @@ -1064,6 +1112,36 @@ func explainWithElement(sb *strings.Builder, n *ast.WithElement, indent string,
explainArrayAccessWithAlias(sb, e, n.Name, indent, depth)
case *ast.BetweenExpr:
explainBetweenExprWithAlias(sb, e, n.Name, indent, depth)
case *ast.UnaryExpr:
// For unary minus with numeric literal, output as negative literal with alias
if e.Op == "-" {
if lit, ok := e.Operand.(*ast.Literal); ok && (lit.Type == ast.LiteralInteger || lit.Type == ast.LiteralFloat) {
// Format as negative literal
negLit := &ast.Literal{
Position: lit.Position,
Type: lit.Type,
Value: lit.Value,
}
if n.Name != "" {
fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, formatNegativeLiteral(negLit), n.Name)
} else {
fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatNegativeLiteral(negLit))
}
return
}
}
// For other unary expressions, output as function
fnName := "negate"
if e.Op == "NOT" {
fnName = "not"
}
if n.Name != "" {
fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Name, 1)
} else {
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1)
}
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1)
Node(sb, e.Operand, depth+2)
default:
// For other types, just output the expression (alias may be lost)
Node(sb, n.Query, depth)
Expand Down
20 changes: 20 additions & 0 deletions internal/explain/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,26 @@ func FormatLiteral(lit *ast.Literal) string {
}
}

// formatNegativeLiteral formats a numeric literal with a negative sign prepended
func formatNegativeLiteral(lit *ast.Literal) string {
switch lit.Type {
case ast.LiteralInteger:
switch val := lit.Value.(type) {
case int64:
return fmt.Sprintf("Int64_-%d", val)
case uint64:
return fmt.Sprintf("Int64_-%d", val)
default:
return fmt.Sprintf("Int64_-%v", lit.Value)
}
case ast.LiteralFloat:
val := lit.Value.(float64)
return fmt.Sprintf("Float64_-%s", FormatFloat(val))
default:
return fmt.Sprintf("-%v", lit.Value)
}
}

// formatArrayLiteral formats an array literal for EXPLAIN AST output
func formatArrayLiteral(val interface{}) string {
exprs, ok := val.([]ast.Expression)
Expand Down
7 changes: 6 additions & 1 deletion internal/explain/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,12 @@ func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int)
fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1)
if allParenthesizedPrimitives {
// Expand the elements
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(elems))
// For empty tuples, don't include children count
if len(elems) == 0 {
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
} else {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(elems))
}
for _, elem := range elems {
Node(sb, elem, depth+4)
}
Expand Down
65 changes: 63 additions & 2 deletions internal/explain/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,16 +298,21 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer
// ClickHouse optimizes UNION ALL when selects have identical expressions but different aliases.
// In that case, only the first SELECT is shown since column names come from the first SELECT anyway.
selects := simplifyUnionSelects(n.Selects)

// Check if we need to group selects due to mode changes
// e.g., A UNION DISTINCT B UNION ALL C -> (A UNION DISTINCT B) UNION ALL C
groupedSelects := groupSelectsByUnionMode(selects, n.UnionModes)

// Wrap selects in ExpressionList
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(selects))
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(groupedSelects))

// Check if first operand has a WITH clause to be inherited by subsequent operands
var inheritedWith []ast.Expression
if len(selects) > 0 {
inheritedWith = extractWithClause(selects[0])
}

for i, sel := range selects {
for i, sel := range groupedSelects {
if i > 0 && len(inheritedWith) > 0 {
// Subsequent operands inherit the WITH clause from the first operand
explainSelectQueryWithInheritedWith(sb, sel, inheritedWith, depth+2)
Expand Down Expand Up @@ -620,6 +625,62 @@ func simplifyUnionSelects(selects []ast.Statement) []ast.Statement {
return selects
}

// groupSelectsByUnionMode groups selects when union modes change from DISTINCT to ALL.
// For example, A UNION DISTINCT B UNION ALL C becomes (A UNION DISTINCT B) UNION ALL C.
// This matches ClickHouse's EXPLAIN AST output which nests DISTINCT groups before ALL.
// Note: The reverse (ALL followed by DISTINCT) does NOT trigger nesting.
func groupSelectsByUnionMode(selects []ast.Statement, unionModes []string) []ast.Statement {
if len(selects) < 3 || len(unionModes) < 2 {
return selects
}

// Normalize union modes (strip "UNION " prefix if present)
normalizeMode := func(mode string) string {
if len(mode) > 6 && mode[:6] == "UNION " {
return mode[6:]
}
return mode
}

// Only group when DISTINCT transitions to ALL
// Find first DISTINCT mode, then check if it's followed by ALL
firstMode := normalizeMode(unionModes[0])
if firstMode != "DISTINCT" {
return selects
}

// Find where DISTINCT ends and ALL begins
modeChangeIdx := -1
for i := 1; i < len(unionModes); i++ {
if normalizeMode(unionModes[i]) == "ALL" {
modeChangeIdx = i
break
}
}

// If no DISTINCT->ALL transition found, return as-is
if modeChangeIdx == -1 {
return selects
}

// Create a nested SelectWithUnionQuery for selects 0..modeChangeIdx (inclusive)
// modeChangeIdx is the index of the union operator, so we include selects[0] through selects[modeChangeIdx]
nestedSelects := selects[:modeChangeIdx+1]
nestedModes := unionModes[:modeChangeIdx]

nested := &ast.SelectWithUnionQuery{
Selects: nestedSelects,
UnionModes: nestedModes,
}

// Result is [nested, selects[modeChangeIdx+1], ...]
result := make([]ast.Statement, 0, len(selects)-modeChangeIdx)
result = append(result, nested)
result = append(result, selects[modeChangeIdx+1:]...)

return result
}

func countSelectQueryChildren(n *ast.SelectQuery) int {
count := 1 // columns ExpressionList
// WITH clause
Expand Down
26 changes: 23 additions & 3 deletions internal/explain/statements.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,13 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string,
if len(n.OrderBy) > 0 {
if len(n.OrderBy) == 1 {
if ident, ok := n.OrderBy[0].(*ast.Identifier); ok {
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, ident.Name())
// When ORDER BY has modifiers (ASC/DESC), wrap in StorageOrderByElement
if n.OrderByHasModifiers {
fmt.Fprintf(sb, "%s StorageOrderByElement (children %d)\n", storageIndent, 1)
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, ident.Name())
} else {
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, ident.Name())
}
} else if lit, ok := n.OrderBy[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple {
// Handle tuple literal - for ORDER BY with modifiers (DESC/ASC),
// ClickHouse outputs just "Function tuple" without children
Expand Down Expand Up @@ -1620,6 +1626,10 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri
if cmdType == ast.AlterClearStatistics {
cmdType = ast.AlterDropStatistics
}
// ATTACH PARTITION ... FROM table is shown as REPLACE_PARTITION in EXPLAIN AST
if cmdType == ast.AlterAttachPartition && cmd.FromTable != "" {
cmdType = ast.AlterReplacePartition
}
// DETACH_PARTITION is shown as DROP_PARTITION in EXPLAIN AST
if cmdType == ast.AlterDetachPartition {
cmdType = ast.AlterDropPartition
Expand Down Expand Up @@ -1802,7 +1812,7 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri
case ast.AlterModifySetting:
fmt.Fprintf(sb, "%s Set\n", indent)
case ast.AlterDropPartition, ast.AlterDetachPartition, ast.AlterAttachPartition,
ast.AlterReplacePartition, ast.AlterFetchPartition, ast.AlterMovePartition, ast.AlterFreezePartition, ast.AlterApplyPatches:
ast.AlterReplacePartition, ast.AlterFetchPartition, ast.AlterMovePartition, ast.AlterFreezePartition, ast.AlterApplyPatches, ast.AlterApplyDeletedMask:
if cmd.Partition != nil {
// PARTITION ALL is shown as Partition_ID (empty) in EXPLAIN AST
if ident, ok := cmd.Partition.(*ast.Identifier); ok && strings.ToUpper(ident.Name()) == "ALL" {
Expand Down Expand Up @@ -1910,6 +1920,9 @@ func explainProjection(sb *strings.Builder, p *ast.Projection, indent string, de

func explainProjectionSelectQuery(sb *strings.Builder, q *ast.ProjectionSelectQuery, indent string, depth int) {
children := 0
if len(q.With) > 0 {
children++
}
if len(q.Columns) > 0 {
children++
}
Expand All @@ -1920,6 +1933,13 @@ func explainProjectionSelectQuery(sb *strings.Builder, q *ast.ProjectionSelectQu
children++
}
fmt.Fprintf(sb, "%sProjectionSelectQuery (children %d)\n", indent, children)
// Output WITH clause first
if len(q.With) > 0 {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(q.With))
for _, w := range q.With {
Node(sb, w, depth+2)
}
}
if len(q.Columns) > 0 {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(q.Columns))
for _, col := range q.Columns {
Expand Down Expand Up @@ -2085,7 +2105,7 @@ func countAlterCommandChildren(cmd *ast.AlterCommand) int {
case ast.AlterModifySetting:
children = 1
case ast.AlterDropPartition, ast.AlterDetachPartition, ast.AlterAttachPartition,
ast.AlterReplacePartition, ast.AlterFetchPartition, ast.AlterMovePartition, ast.AlterFreezePartition, ast.AlterApplyPatches:
ast.AlterReplacePartition, ast.AlterFetchPartition, ast.AlterMovePartition, ast.AlterFreezePartition, ast.AlterApplyPatches, ast.AlterApplyDeletedMask:
if cmd.Partition != nil {
children++
}
Expand Down
Loading