Skip to content

Commit a32530d

Browse files
authored
fix: support Unicode method names in parser and compiler (#12)
* test: add failing tests for Unicode method names Add test cases for non-ASCII method name parsing: - Korean characters (안녕하세요) - Mixed ASCII and Unicode (비_영어_함수명___테스트1!) - Japanese characters (こんにちは) - Class methods with Unicode names All tests currently fail due to \w regex pattern limitation. Related to #11 * fix: support Unicode method names in parser and compiler Replace \w regex pattern with [\p{L}\p{N}_] to support non-ASCII characters (Korean, Japanese, etc.) in method names. Changes: - Add IDENTIFIER_CHAR and METHOD_NAME_PATTERN constants - Update parser.rb to detect Unicode method definitions - Update compiler.rb to strip type annotations from Unicode methods Fixes #11 * fix: parse conditional expressions for union type inference Add parse_conditional method to BodyParser that properly parses if/unless/elsif/else blocks into IR::Conditional nodes. This enables the type inference system to collect all possible return values from conditional branches and unify them into union types. The fix handles: - Simple if/else blocks - elsif chains (parsed as nested if) - unless statements - Nested conditionals at correct depth Fixes #13 * fix: ignore unreachable code after return in type inference Modified collect_returns_recursive to return a termination flag. When a return statement is encountered, subsequent code in the same block is now correctly identified as unreachable and excluded from type inference. This ensures that methods like: def test return false if condition "string" end end Are inferred as returning `bool` instead of `bool | String`.
1 parent 067a7f8 commit a32530d

7 files changed

Lines changed: 340 additions & 18 deletions

File tree

lib/t_ruby/ast_type_inferrer.rb

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,13 @@ def infer_method_return_type(method_node, class_env = nil)
205205
end
206206

207207
# 본문에서 반환 타입 수집
208-
return_types = collect_return_types(method_node.body, env)
208+
return_types, terminated = collect_return_types(method_node.body, env)
209209

210-
# 암묵적 반환값 추론 (마지막 표현식)
211-
implicit_return = infer_implicit_return(method_node.body, env)
212-
return_types << implicit_return if implicit_return
210+
# 암묵적 반환값 추론 (마지막 표현식) - 종료되지 않은 경우만
211+
unless terminated
212+
implicit_return = infer_implicit_return(method_node.body, env)
213+
return_types << implicit_return if implicit_return
214+
end
213215

214216
# 타입 통합
215217
unify_types(return_types)
@@ -407,24 +409,35 @@ def infer_return(node, env)
407409
end
408410

409411
# 본문에서 모든 return 타입 수집
412+
# @return [Array<(Array<String>, Boolean)>] [수집된 타입들, 종료 여부]
410413
def collect_return_types(body, env)
411414
types = []
412415

413-
collect_returns_recursive(body, env, types)
416+
terminated = collect_returns_recursive(body, env, types)
414417

415-
types
418+
[types, terminated]
416419
end
417420

421+
# @return [Boolean] true if this node terminates (contains unconditional return)
418422
def collect_returns_recursive(node, env, types)
419423
case node
420424
when IR::Return
421425
type = node.value ? infer_expression(node.value, env) : "nil"
422426
types << type
427+
true # return은 항상 실행 흐름 종료
423428
when IR::Block
424-
node.statements.each { |stmt| collect_returns_recursive(stmt, env, types) }
429+
node.statements.each do |stmt|
430+
terminated = collect_returns_recursive(stmt, env, types)
431+
return true if terminated # return 이후 코드는 unreachable
432+
end
433+
false
425434
when IR::Conditional
426-
collect_returns_recursive(node.then_branch, env, types) if node.then_branch
427-
collect_returns_recursive(node.else_branch, env, types) if node.else_branch
435+
then_terminated = node.then_branch ? collect_returns_recursive(node.then_branch, env, types) : false
436+
else_terminated = node.else_branch ? collect_returns_recursive(node.else_branch, env, types) : false
437+
# 모든 분기가 종료되어야 조건문 전체가 종료됨
438+
then_terminated && else_terminated
439+
else
440+
false
428441
end
429442
end
430443

lib/t_ruby/body_parser.rb

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,96 @@ def parse(lines, start_line, end_line)
1919
stripped = line.strip
2020

2121
# 빈 줄이나 주석은 건너뛰기
22-
unless stripped.empty? || stripped.start_with?("#")
23-
node = parse_statement(stripped, i)
24-
statements << node if node
22+
if stripped.empty? || stripped.start_with?("#")
23+
i += 1
24+
next
2525
end
2626

27+
# if/unless 조건문 처리
28+
if stripped.match?(/^(if|unless)\s+/)
29+
node, next_i = parse_conditional(lines, i, end_line)
30+
if node
31+
statements << node
32+
i = next_i
33+
next
34+
end
35+
end
36+
37+
node = parse_statement(stripped, i)
38+
statements << node if node
2739
i += 1
2840
end
2941

3042
IR::Block.new(statements: statements)
3143
end
3244

45+
# if/unless/elsif 조건문 파싱
46+
# @return [Array(IR::Conditional, Integer)] 조건문 노드와 다음 라인 인덱스
47+
def parse_conditional(lines, start_line, block_end)
48+
line = lines[start_line].strip
49+
match = line.match(/^(if|unless|elsif)\s+(.+)$/)
50+
return [nil, start_line] unless match
51+
52+
# elsif는 내부적으로 if처럼 처리
53+
kind = match[1] == "elsif" ? :if : match[1].to_sym
54+
condition = parse_expression(match[2])
55+
56+
# then/elsif/else/end 블록 찾기
57+
then_statements = []
58+
else_statements = []
59+
current_branch = :then
60+
depth = 1
61+
i = start_line + 1
62+
63+
while i < block_end && depth.positive?
64+
current_line = lines[i].strip
65+
66+
if current_line.match?(/^(if|unless|case|while|until|for|begin)\b/)
67+
depth += 1
68+
if current_branch == :then
69+
then_statements << IR::RawCode.new(code: current_line)
70+
else
71+
else_statements << IR::RawCode.new(code: current_line)
72+
end
73+
elsif current_line == "end"
74+
depth -= 1
75+
break if depth.zero?
76+
elsif depth == 1 && current_line.match?(/^elsif\s+/)
77+
# elsif는 중첩된 if로 처리
78+
nested_cond, next_i = parse_conditional(lines, i, block_end)
79+
else_statements << nested_cond if nested_cond
80+
i = next_i
81+
break
82+
elsif depth == 1 && current_line == "else"
83+
current_branch = :else
84+
elsif !current_line.empty? && !current_line.start_with?("#")
85+
node = parse_statement(current_line, i)
86+
next unless node
87+
88+
if current_branch == :then
89+
then_statements << node
90+
else
91+
else_statements << node
92+
end
93+
end
94+
95+
i += 1
96+
end
97+
98+
then_block = IR::Block.new(statements: then_statements)
99+
else_block = else_statements.empty? ? nil : IR::Block.new(statements: else_statements)
100+
101+
conditional = IR::Conditional.new(
102+
condition: condition,
103+
then_branch: then_block,
104+
else_branch: else_block,
105+
kind: kind,
106+
location: start_line
107+
)
108+
109+
[conditional, i + 1]
110+
end
111+
33112
private
34113

35114
# 단일 문장 파싱

lib/t_ruby/compiler.rb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
require "fileutils"
44

55
module TRuby
6+
# Pattern for method names that supports Unicode characters
7+
# \p{L} matches any Unicode letter, \p{N} matches any Unicode number
8+
IDENTIFIER_CHAR = '[\p{L}\p{N}_]'
9+
METHOD_NAME_PATTERN = "#{IDENTIFIER_CHAR}+[?!]?".freeze
10+
611
class Compiler
712
attr_reader :declaration_loader, :use_ir, :optimizer
813

@@ -362,7 +367,7 @@ def erase_parameter_types(source)
362367
result = source.dup
363368

364369
# Match function definitions and remove type annotations from parameters
365-
result.gsub!(/^(\s*def\s+\w+\s*\()([^)]+)(\)\s*)(?::\s*[^\n]+)?(\s*$)/) do |_match|
370+
result.gsub!(/^(\s*def\s+#{TRuby::METHOD_NAME_PATTERN}\s*\()([^)]+)(\)\s*)(?::\s*[^\n]+)?(\s*$)/) do |_match|
366371
indent = ::Regexp.last_match(1)
367372
params = ::Regexp.last_match(2)
368373
close_paren = ::Regexp.last_match(3)
@@ -411,8 +416,8 @@ def remove_param_types(params_str)
411416

412417
# Clean a single parameter (remove type annotation)
413418
def clean_param(param)
414-
# Match: name: Type or name
415-
if (match = param.match(/^(\w+)\s*:/))
419+
# Match: name: Type or name (supports Unicode identifiers)
420+
if (match = param.match(/^(#{TRuby::IDENTIFIER_CHAR}+)\s*:/))
416421
match[1]
417422
else
418423
param

lib/t_ruby/parser.rb

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ class Parser
77
# Type names that are recognized as valid
88
VALID_TYPES = %w[String Integer Boolean Array Hash Symbol void nil].freeze
99

10+
# Pattern for method/variable names that supports Unicode characters
11+
# \p{L} matches any Unicode letter, \p{N} matches any Unicode number
12+
IDENTIFIER_CHAR = '[\p{L}\p{N}_]'
13+
# Method names can end with ? or !
14+
METHOD_NAME_PATTERN = "#{IDENTIFIER_CHAR}+[?!]?".freeze
15+
1016
attr_reader :source, :ir_program, :use_combinator
1117

1218
def initialize(source, use_combinator: true, parse_body: true)
@@ -56,7 +62,7 @@ def parse
5662
end
5763

5864
# Match function definitions (top-level only, not inside class)
59-
if line.match?(/^\s*def\s+\w+/)
65+
if line.match?(/^\s*def\s+#{IDENTIFIER_CHAR}+/)
6066
func_info, next_i = parse_function_with_body(i)
6167
if func_info
6268
functions << func_info
@@ -167,7 +173,7 @@ def parse_function_definition(line)
167173
# def foo(): Type - no params but with return type
168174
# def foo(params) - with params, no return type
169175
# def foo - no params, no return type
170-
match = line.match(/^\s*def\s+([\w?!]+)\s*(?:\((.*?)\))?\s*(?::\s*(.+?))?\s*$/)
176+
match = line.match(/^\s*def\s+(#{METHOD_NAME_PATTERN})\s*(?:\((.*?)\))?\s*(?::\s*(.+?))?\s*$/)
171177
return nil unless match
172178

173179
function_name = match[1]
@@ -320,7 +326,7 @@ def parse_class(start_index)
320326
current_line = @lines[i]
321327

322328
# Match method definitions inside class
323-
if current_line.match?(/^\s*def\s+\w+/)
329+
if current_line.match?(/^\s*def\s+#{IDENTIFIER_CHAR}+/)
324330
method_info, next_i = parse_method_in_class(i, class_end)
325331
if method_info
326332
methods << method_info

spec/t_ruby/ast_type_inferrer_spec.rb

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,104 @@
282282
expect(inferrer.type_cache).to have_key(node.object_id)
283283
end
284284
end
285+
286+
describe "unreachable code handling" do
287+
it "ignores code after unconditional return" do
288+
# def test
289+
# return false
290+
# "unreachable"
291+
# end
292+
body = TRuby::IR::Block.new(
293+
statements: [
294+
TRuby::IR::Return.new(
295+
value: TRuby::IR::Literal.new(value: false, literal_type: :boolean)
296+
),
297+
TRuby::IR::Literal.new(value: "unreachable", literal_type: :string),
298+
]
299+
)
300+
method = TRuby::IR::MethodDef.new(
301+
name: "test",
302+
params: [],
303+
return_type: nil,
304+
body: body
305+
)
306+
307+
# Should be bool, not bool | String
308+
expect(inferrer.infer_method_return_type(method)).to eq("bool")
309+
end
310+
311+
it "ignores conditional after unconditional return" do
312+
# def test
313+
# return 42
314+
# if condition
315+
# "then"
316+
# else
317+
# "else"
318+
# end
319+
# end
320+
conditional = TRuby::IR::Conditional.new(
321+
condition: TRuby::IR::Literal.new(value: true, literal_type: :boolean),
322+
then_branch: TRuby::IR::Block.new(
323+
statements: [TRuby::IR::Literal.new(value: "then", literal_type: :string)]
324+
),
325+
else_branch: TRuby::IR::Block.new(
326+
statements: [TRuby::IR::Literal.new(value: "else", literal_type: :string)]
327+
),
328+
kind: :if
329+
)
330+
body = TRuby::IR::Block.new(
331+
statements: [
332+
TRuby::IR::Return.new(
333+
value: TRuby::IR::Literal.new(value: 42, literal_type: :integer)
334+
),
335+
conditional,
336+
]
337+
)
338+
method = TRuby::IR::MethodDef.new(
339+
name: "test",
340+
params: [],
341+
return_type: nil,
342+
body: body
343+
)
344+
345+
# Should be Integer only
346+
expect(inferrer.infer_method_return_type(method)).to eq("Integer")
347+
end
348+
349+
it "collects returns from all branches when conditional does not fully terminate" do
350+
# def test
351+
# if condition
352+
# return "yes"
353+
# end
354+
# "no"
355+
# end
356+
conditional = TRuby::IR::Conditional.new(
357+
condition: TRuby::IR::Literal.new(value: true, literal_type: :boolean),
358+
then_branch: TRuby::IR::Block.new(
359+
statements: [
360+
TRuby::IR::Return.new(
361+
value: TRuby::IR::Literal.new(value: "yes", literal_type: :string)
362+
),
363+
]
364+
),
365+
else_branch: nil,
366+
kind: :if
367+
)
368+
body = TRuby::IR::Block.new(
369+
statements: [
370+
conditional,
371+
TRuby::IR::Literal.new(value: "no", literal_type: :string),
372+
]
373+
)
374+
method = TRuby::IR::MethodDef.new(
375+
name: "test",
376+
params: [],
377+
return_type: nil,
378+
body: body
379+
)
380+
381+
# Should include both String from return and String from implicit return
382+
expect(inferrer.infer_method_return_type(method)).to eq("String")
383+
end
384+
end
285385
end

0 commit comments

Comments
 (0)