Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 87 additions & 1 deletion lib/prism/translation/ripper/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
module Prism
module Translation
class Ripper
class Lexer # :nodoc:
class Lexer < Ripper # :nodoc:
# :stopdoc:
class State

Expand Down Expand Up @@ -39,6 +39,92 @@ def allbits?(i) to_int.allbits?(i) end
def anybits?(i) to_int.anybits?(i) end
def nobits?(i) to_int.nobits?(i) end
end

class Elem
attr_accessor :pos, :event, :tok, :state, :message

def initialize(pos, event, tok, state, message = nil)
@pos = pos
@event = event
@tok = tok
@state = State.new(state)
@message = message
end

def [](index)
case index
when 0, :pos
@pos
when 1, :event
@event
when 2, :tok
@tok
when 3, :state
@state
when 4, :message
@message
else
nil
end
end

def inspect
"#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>"
end

alias to_s inspect

def pretty_print(q)
q.group(2, "#<#{self.class}:", ">") {
q.breakable
q.text("#{event}@#{pos[0]}:#{pos[1]}")
q.breakable
state.pretty_print(q)
q.breakable
q.text("token: ")
tok.pretty_print(q)
if message
q.breakable
q.text("message: ")
q.text(message)
end
}
end

def to_a
if @message
[@pos, @event, @tok, @state, @message]
else
[@pos, @event, @tok, @state]
end
end
end

def initialize(...)
super
@lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno)
end

# Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
# Since ripper is a streaming parser, tokens are expected to be emitted in the order
# that the parser encounters them. This is not implemented.
def parse(raise_errors: false)
if @lex_compat.failure? && raise_errors
raise SyntaxError, @lex_compat.errors.first.message
else
@lex_compat.value.map do |position, event, token, state|
Elem.new(position, event, token, state.to_int)
end
end
end

# Similar to parse but ripper sorts the elements by position in the source. Also
# includes errors. Since prism does error recovery, in cases of syntax errors
# the result may differ greatly compared to ripper.
def scan(...)
parse(...)
end

# :startdoc:
end
end
Expand Down
48 changes: 44 additions & 4 deletions test/prism/ruby/ripper_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class RipperTest < TestCase
end

# Skip these tests that we haven't implemented yet.
omitted = [
omitted_sexp_raw = [
"dos_endings.txt",
"heredocs_with_fake_newlines.txt",
"heredocs_with_ignored_newlines.txt",
Expand All @@ -59,8 +59,29 @@ class RipperTest < TestCase
"whitequark/slash_newline_in_heredocs.txt"
]

Fixture.each_for_current_ruby(except: incorrect | omitted) do |fixture|
define_method(fixture.test_name) { assert_ripper(fixture.read) }
omitted_lexer_parse = [
"comments.txt",
"heredoc_percent_q_newline_delimiter.txt",
"heredoc_with_escaped_newline_at_start.txt",
"heredocs_with_fake_newlines.txt",
"indented_file_end.txt",
"seattlerb/TestRubyParserShared.txt",
"seattlerb/class_comments.txt",
"seattlerb/module_comments.txt",
"seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
"seattlerb/parse_line_block_inline_multiline_comment.txt",
"spanning_heredoc_newlines.txt",
"strings.txt",
"whitequark/dedenting_heredoc.txt",
"whitequark/procarg0.txt",
]

Fixture.each_for_current_ruby(except: incorrect | omitted_sexp_raw) do |fixture|
define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) }
end

Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture|
define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) }
end

# Check that the hardcoded values don't change without us noticing.
Expand All @@ -76,8 +97,27 @@ def test_internals

private

def assert_ripper(source)
def assert_ripper_sexp_raw(source)
assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source)
end

def assert_ripper_lexer_parse(source)
prism = Translation::Ripper::Lexer.new(source).parse
ripper = Ripper::Lexer.new(source).parse
ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp
ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order

[prism.size, ripper.size].max.times do |i|
expected = ripper[i].to_a
actual = prism[i].to_a
# Since tokens related to heredocs are not emitted in the same order,
# the state also doesn't line up.
if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
expected[3] = actual[3] = nil
end

assert_equal(expected, actual)
end
end
end
end