From 01d1cbbbec3d40cb2e3f564371beb9dc3729c7ba Mon Sep 17 00:00:00 2001 From: Victor Shepelev Date: Wed, 24 Dec 2025 02:53:08 +0200 Subject: [PATCH 01/19] [DOC] Enhance Fiber::Scheduler docs (#15708) --- scheduler.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/scheduler.c b/scheduler.c index 592bdcd1ef45ef..b23ddad41e70cc 100644 --- a/scheduler.c +++ b/scheduler.c @@ -293,13 +293,15 @@ rb_fiber_scheduler_blocking_operation_new(void *(*function)(void *), void *data, * * Hook methods are: * - * * #io_wait, #io_read, #io_write, #io_pread, #io_pwrite, and #io_select, #io_close + * * #io_wait, #io_read, #io_write, #io_pread, #io_pwrite #io_select, and #io_close * * #process_wait * * #kernel_sleep * * #timeout_after * * #address_resolve * * #block and #unblock * * #blocking_operation_wait + * * #fiber_interrupt + * * #yield * * (the list is expanded as Ruby developers make more methods having non-blocking calls) * * When not specified otherwise, the hook implementations are mandatory: if they are not @@ -371,6 +373,9 @@ Init_Fiber_Scheduler(void) rb_define_method(rb_cFiberScheduler, "unblock", rb_fiber_scheduler_unblock, 2); rb_define_method(rb_cFiberScheduler, "fiber", rb_fiber_scheduler_fiber, -2); rb_define_method(rb_cFiberScheduler, "blocking_operation_wait", rb_fiber_scheduler_blocking_operation_wait, -2); + rb_define_method(rb_cFiberScheduler, "yield", rb_fiber_scheduler_yield, 0); + rb_define_method(rb_cFiberScheduler, "fiber_interrupt", rb_fiber_scheduler_fiber_interrupt, 2); + rb_define_method(rb_cFiberScheduler, "io_close", rb_fiber_scheduler_io_close, 1); #endif } @@ -527,7 +532,7 @@ rb_fiber_scheduler_make_timeout(struct timeval *timeout) * Document-method: Fiber::Scheduler#kernel_sleep * call-seq: kernel_sleep(duration = nil) * - * Invoked by Kernel#sleep and Mutex#sleep and is expected to provide + * Invoked by Kernel#sleep and Thread::Mutex#sleep and is expected to provide * an implementation of sleeping in a non-blocking way. Implementation might * register the current fiber in some list of "which fiber wait until what * moment", call Fiber.yield to pass control, and then in #close resume @@ -586,7 +591,7 @@ rb_fiber_scheduler_yield(VALUE scheduler) * However, as a result of this design, if the +block+ does not invoke any * non-blocking operations, it will be impossible to interrupt it. If you * desire to provide predictable points for timeouts, consider adding - * +sleep(0)+. + * sleep(0). * * If the block is executed successfully, its result will be returned. * @@ -641,7 +646,7 @@ rb_fiber_scheduler_process_wait(VALUE scheduler, rb_pid_t pid, int flags) * Document-method: Fiber::Scheduler#block * call-seq: block(blocker, timeout = nil) * - * Invoked by methods like Thread.join, and by Mutex, to signify that current + * Invoked by methods like Thread.join, and by Thread::Mutex, to signify that current * Fiber is blocked until further notice (e.g. #unblock) or until +timeout+ has * elapsed. * @@ -661,8 +666,8 @@ rb_fiber_scheduler_block(VALUE scheduler, VALUE blocker, VALUE timeout) * Document-method: Fiber::Scheduler#unblock * call-seq: unblock(blocker, fiber) * - * Invoked to wake up Fiber previously blocked with #block (for example, Mutex#lock - * calls #block and Mutex#unlock calls #unblock). The scheduler should use + * Invoked to wake up Fiber previously blocked with #block (for example, Thread::Mutex#lock + * calls #block and Thread::Mutex#unlock calls #unblock). The scheduler should use * the +fiber+ parameter to understand which fiber is unblocked. * * +blocker+ is what was awaited for, but it is informational only (for debugging @@ -1021,6 +1026,14 @@ rb_fiber_scheduler_io_pwrite_memory(VALUE scheduler, VALUE io, rb_off_t from, co return result; } +/* + * Document-method: Fiber::Scheduler#io_close + * call-seq: io_close(fd) + * + * Invoked by Ruby's core methods to notify scheduler that the IO object is closed. Note that + * the method will receive an integer file descriptor of the closed object, not an object + * itself. + */ VALUE rb_fiber_scheduler_io_close(VALUE scheduler, VALUE io) { @@ -1076,7 +1089,8 @@ rb_fiber_scheduler_address_resolve(VALUE scheduler, VALUE hostname) * call-seq: blocking_operation_wait(blocking_operation) * * Invoked by Ruby's core methods to run a blocking operation in a non-blocking way. - * The blocking_operation is a Fiber::Scheduler::BlockingOperation that encapsulates the blocking operation. + * The blocking_operation is an opaque object that encapsulates the blocking operation + * and responds to a #call method without any arguments. * * If the scheduler doesn't implement this method, or if the scheduler doesn't execute * the blocking operation, Ruby will fall back to the non-scheduler implementation. @@ -1118,6 +1132,15 @@ VALUE rb_fiber_scheduler_blocking_operation_wait(VALUE scheduler, void* (*functi return result; } +/* + * Document-method: Fiber::Scheduler#fiber_interrupt + * call-seq: fiber_interrupt(fiber, exception) + * + * Invoked by Ruby's core methods to notify the scheduler that the blocked fiber should be interrupted + * with an exception. For example, IO#close uses this method to interrupt fibers that are performing + * blocking IO operations. + * + */ VALUE rb_fiber_scheduler_fiber_interrupt(VALUE scheduler, VALUE fiber, VALUE exception) { VALUE arguments[] = { From 2df72c0c1a686052ab00f853bb15bd67dcbfedd4 Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Wed, 24 Dec 2025 13:09:05 +1300 Subject: [PATCH 02/19] Fix flaky test. --- test/fiber/scheduler.rb | 2 +- test/fiber/test_scheduler.rb | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/fiber/scheduler.rb b/test/fiber/scheduler.rb index 07b15c5ce4b86a..820c46dfb01743 100644 --- a/test/fiber/scheduler.rb +++ b/test/fiber/scheduler.rb @@ -497,7 +497,7 @@ def io_write(io, buffer, length, offset) fd = io.fileno str = buffer.get_string __io_ops__ << [:io_write, fd, str] - Fiber.blocking { buffer.write(IO.for_fd(fd), 0, offset) } + Fiber.blocking { buffer.write(io, 0, offset) } end end diff --git a/test/fiber/test_scheduler.rb b/test/fiber/test_scheduler.rb index c20fe86ff4531d..0cbd49dacab5c6 100644 --- a/test/fiber/test_scheduler.rb +++ b/test/fiber/test_scheduler.rb @@ -287,7 +287,6 @@ def test_post_fork_fiber_blocking end def test_io_write_on_flush - omit "skip this test because it makes CI fragile" begin fn = File.join(Dir.tmpdir, "ruby_test_io_write_on_flush_#{SecureRandom.hex}") write_fd = nil From 30d9782c5c07db2d5df44656eb0218616f7b0bb5 Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Wed, 24 Dec 2025 13:26:07 +1300 Subject: [PATCH 03/19] Tidy up fiber scheduler tests. --- test/fiber/scheduler.rb | 16 +++++--- test/fiber/test_scheduler.rb | 79 ++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/test/fiber/scheduler.rb b/test/fiber/scheduler.rb index 820c46dfb01743..8f1ce4376b2c29 100644 --- a/test/fiber/scheduler.rb +++ b/test/fiber/scheduler.rb @@ -489,15 +489,19 @@ def blocking(&block) end class IOScheduler < Scheduler - def __io_ops__ - @__io_ops__ ||= [] + def operations + @operations ||= [] end def io_write(io, buffer, length, offset) - fd = io.fileno - str = buffer.get_string - __io_ops__ << [:io_write, fd, str] - Fiber.blocking { buffer.write(io, 0, offset) } + descriptor = io.fileno + string = buffer.get_string + + self.operations << [:io_write, descriptor, string] + + Fiber.blocking do + buffer.write(io, 0, offset) + end end end diff --git a/test/fiber/test_scheduler.rb b/test/fiber/test_scheduler.rb index 0cbd49dacab5c6..d3696267f7934b 100644 --- a/test/fiber/test_scheduler.rb +++ b/test/fiber/test_scheduler.rb @@ -288,90 +288,99 @@ def test_post_fork_fiber_blocking def test_io_write_on_flush begin - fn = File.join(Dir.tmpdir, "ruby_test_io_write_on_flush_#{SecureRandom.hex}") - write_fd = nil - io_ops = nil + path = File.join(Dir.tmpdir, "ruby_test_io_write_on_flush_#{SecureRandom.hex}") + descriptor = nil + operations = nil + thread = Thread.new do scheduler = IOScheduler.new Fiber.set_scheduler scheduler Fiber.schedule do - File.open(fn, 'w+') do |f| - write_fd = f.fileno - f << 'foo' - f.flush - f << 'bar' + File.open(path, 'w+') do |file| + descriptor = file.fileno + file << 'foo' + file.flush + file << 'bar' end end - io_ops = scheduler.__io_ops__ + + operations = scheduler.operations end + thread.join assert_equal [ - [:io_write, write_fd, 'foo'], - [:io_write, write_fd, 'bar'] - ], io_ops + [:io_write, descriptor, 'foo'], + [:io_write, descriptor, 'bar'] + ], operations - assert_equal 'foobar', IO.read(fn) + assert_equal 'foobar', IO.read(path) ensure thread.kill rescue nil - FileUtils.rm_f(fn) + FileUtils.rm_f(path) end end def test_io_read_error - fn = File.join(Dir.tmpdir, "ruby_test_io_read_error_#{SecureRandom.hex}") - exception = nil + path = File.join(Dir.tmpdir, "ruby_test_io_read_error_#{SecureRandom.hex}") + error = nil + thread = Thread.new do scheduler = IOErrorScheduler.new Fiber.set_scheduler scheduler Fiber.schedule do - File.open(fn, 'w+') { it.read } - rescue => e - exception = e + File.open(path, 'w+') { it.read } + rescue => error + # Ignore. end end + thread.join - assert_kind_of Errno::EBADF, exception + assert_kind_of Errno::EBADF, error ensure thread.kill rescue nil - FileUtils.rm_f(fn) + FileUtils.rm_f(path) end def test_io_write_error - fn = File.join(Dir.tmpdir, "ruby_test_io_write_error_#{SecureRandom.hex}") - exception = nil + path = File.join(Dir.tmpdir, "ruby_test_io_write_error_#{SecureRandom.hex}") + error = nil + thread = Thread.new do scheduler = IOErrorScheduler.new Fiber.set_scheduler scheduler Fiber.schedule do - File.open(fn, 'w+') { it.sync = true; it << 'foo' } - rescue => e - exception = e + File.open(path, 'w+') { it.sync = true; it << 'foo' } + rescue => error + # Ignore. end end + thread.join - assert_kind_of Errno::EINVAL, exception + assert_kind_of Errno::EINVAL, error ensure thread.kill rescue nil - FileUtils.rm_f(fn) + FileUtils.rm_f(path) end def test_io_write_flush_error - fn = File.join(Dir.tmpdir, "ruby_test_io_write_flush_error_#{SecureRandom.hex}") - exception = nil + path = File.join(Dir.tmpdir, "ruby_test_io_write_flush_error_#{SecureRandom.hex}") + error = nil + thread = Thread.new do scheduler = IOErrorScheduler.new Fiber.set_scheduler scheduler Fiber.schedule do - File.open(fn, 'w+') { it << 'foo' } - rescue => e - exception = e + File.open(path, 'w+') { it << 'foo' } + rescue => error + # Ignore. end end + thread.join - assert_kind_of Errno::EINVAL, exception + assert_kind_of Errno::EINVAL, error ensure thread.kill rescue nil - FileUtils.rm_f(fn) + FileUtils.rm_f(path) end end From e2a58c45b1c79204ae80b3960f44a818ec00b94e Mon Sep 17 00:00:00 2001 From: Steve Date: Sun, 21 Dec 2025 14:49:57 +0300 Subject: [PATCH 04/19] [DOC] Fix minor typo in signals.rdoc --- doc/language/signals.rdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/language/signals.rdoc b/doc/language/signals.rdoc index 403eb66549e5c2..a82dab81c68e92 100644 --- a/doc/language/signals.rdoc +++ b/doc/language/signals.rdoc @@ -17,7 +17,7 @@ for its internal data structures, but it does not know when it is safe for data structures in YOUR code. Ruby implements deferred signal handling by registering short C functions with only {async-signal-safe functions}[http://man7.org/linux/man-pages/man7/signal-safety.7.html] as -signal handlers. These short C functions only do enough tell the VM to +signal handlers. These short C functions only do enough to tell the VM to run callbacks registered via Signal.trap later in the main Ruby Thread. == Unsafe methods to call in Signal.trap blocks From 688c1f6c5e96dfa3e4f6b16c617545ded7c8c0b4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 5 Dec 2025 15:22:57 +0900 Subject: [PATCH 05/19] [DOC] Reword "Pattern Matching" to "Regular Expression" In ruby, since 3.1 at least, the words "Pattern Matching" should refer the syntax. --- doc/language/globals.md | 4 ++-- doc/string/partition.rdoc | 4 ++-- doc/string/rpartition.rdoc | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/language/globals.md b/doc/language/globals.md index a4199e488ab585..905a23ed05dbe4 100644 --- a/doc/language/globals.md +++ b/doc/language/globals.md @@ -19,7 +19,7 @@ require 'English' | `$!` | `$ERROR_INFO` | \Exception object or `nil` | `nil` | Yes | Kernel#raise | | `$@` | `$ERROR_POSITION` | \Array of backtrace positions or `nil` | `nil` | Yes | Kernel#raise | -### Pattern Matching +### Regular Expression | Variable | \English | Contains | Initially | Read-Only | Reset By | |:-------------:|:-------------------:|-----------------------------------|:---------:|:---------:|-----------------| @@ -127,7 +127,7 @@ Output: English - `$ERROR_POSITION`. -## Pattern Matching +## Regular Expression These global variables store information about the most recent successful match in the current scope. diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc index 330e6b03987fcf..86c3a9ca0a975a 100644 --- a/doc/string/partition.rdoc +++ b/doc/string/partition.rdoc @@ -17,7 +17,7 @@ Note that in the examples below, a returned string 'hello' is a copy of +self+, not +self+. If +pattern+ is a Regexp, performs the equivalent of self.match(pattern) -(also setting {pattern-matching global variables}[rdoc-ref:language/globals.md@Pattern+Matching]): +(also setting {pattern-matching global variables}[rdoc-ref:language/globals.md@Regular+Expression]): 'hello'.partition(/h/) # => ["", "h", "ello"] 'hello'.partition(/l/) # => ["he", "l", "lo"] @@ -30,7 +30,7 @@ If +pattern+ is a Regexp, performs the equivalent of self.match(pattern)self.index(pattern) -(and does _not_ set {pattern-matching global variables}[rdoc-ref:language/globals.md@Pattern+Matching]): +(and does _not_ set {pattern-matching global variables}[rdoc-ref:language/globals.md@Regular+Expression]): 'hello'.partition('h') # => ["", "h", "ello"] 'hello'.partition('l') # => ["he", "l", "lo"] diff --git a/doc/string/rpartition.rdoc b/doc/string/rpartition.rdoc index 11b0571bfb2ba7..879b6ee2864295 100644 --- a/doc/string/rpartition.rdoc +++ b/doc/string/rpartition.rdoc @@ -23,7 +23,7 @@ The pattern used is: Note that in the examples below, a returned string 'hello' is a copy of +self+, not +self+. If +pattern+ is a Regexp, searches for the last matching substring -(also setting {pattern-matching global variables}[rdoc-ref:language/globals.md@Pattern+Matching]): +(also setting {pattern-matching global variables}[rdoc-ref:language/globals.md@Regular+Expression]): 'hello'.rpartition(/l/) # => ["hel", "l", "o"] 'hello'.rpartition(/ll/) # => ["he", "ll", "o"] @@ -36,7 +36,7 @@ If +pattern+ is a Regexp, searches for the last matching substring If +pattern+ is not a Regexp, converts it to a string (if it is not already one), then searches for the last matching substring -(and does _not_ set {pattern-matching global variables}[rdoc-ref:language/globals.md@Pattern+Matching]): +(and does _not_ set {pattern-matching global variables}[rdoc-ref:language/globals.md@Regular+Expression]): 'hello'.rpartition('l') # => ["hel", "l", "o"] 'hello'.rpartition('ll') # => ["he", "ll", "o"] From ceea8060e4cc94846b2ff32fe8b0bb39049eda2e Mon Sep 17 00:00:00 2001 From: YO4 Date: Fri, 3 Oct 2025 22:04:13 +0900 Subject: [PATCH 06/19] Properly handle test cases terminated by signals in test-bundled-gems Process::Status#exitstatus turn into nil when child process is signeled. When exit_code was unchanged, test-bundled-gems.rb returned 0 and make was unable to detect the failure. Fix this. --- tool/test-bundled-gems.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/test-bundled-gems.rb b/tool/test-bundled-gems.rb index 006ebd981af913..98b6bb9048c5a8 100644 --- a/tool/test-bundled-gems.rb +++ b/tool/test-bundled-gems.rb @@ -133,7 +133,7 @@ puts colorize.decorate(mesg, "skip") else failed << gem - exit_code = $?.exitstatus if $?.exitstatus + exit_code = 1 end end end From 6e2bf5df4eeab8e37fab86206d4f2e8ab36a60b7 Mon Sep 17 00:00:00 2001 From: aguspe Date: Tue, 23 Dec 2025 18:07:15 +0100 Subject: [PATCH 07/19] [Tests] Assert Module#set_temporary_name returns self The return value of Module#set_temporary_name was changed to return `self`, but the existing tests did not verify this. --- test/ruby/test_module.rb | 8 ++++---- variable.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/ruby/test_module.rb b/test/ruby/test_module.rb index 3a47c2551a813e..30a7c5d9bc1c22 100644 --- a/test/ruby/test_module.rb +++ b/test/ruby/test_module.rb @@ -3367,11 +3367,11 @@ def test_set_temporary_name m.const_set(:N, Module.new) assert_match(/\A#::N\z/, m::N.name) - m::N.set_temporary_name(name = "fake_name_under_M") + assert_same m::N, m::N.set_temporary_name(name = "fake_name_under_M") name.upcase! assert_equal("fake_name_under_M", m::N.name) assert_raise(FrozenError) {m::N.name.upcase!} - m::N.set_temporary_name(nil) + assert_same m::N, m::N.set_temporary_name(nil) assert_nil(m::N.name) m::N.const_set(:O, Module.new) @@ -3379,14 +3379,14 @@ def test_set_temporary_name m::N.const_set(:Recursive, m) m.const_set(:A, 42) - m.set_temporary_name(name = "fake_name") + assert_same m, m.set_temporary_name(name = "fake_name") name.upcase! assert_equal("fake_name", m.name) assert_raise(FrozenError) {m.name.upcase!} assert_equal("fake_name::N", m::N.name) assert_equal("fake_name::N::O", m::N::O.name) - m.set_temporary_name(nil) + assert_same m, m.set_temporary_name(nil) assert_nil m.name assert_nil m::N.name assert_nil m::N::O.name diff --git a/variable.c b/variable.c index 085ba240e412ee..ff8d24d78aef6c 100644 --- a/variable.c +++ b/variable.c @@ -279,7 +279,7 @@ set_sub_temporary_name(VALUE mod, VALUE name) * m.name #=> nil * * c = Class.new - * c.set_temporary_name("MyClass(with description)") + * c.set_temporary_name("MyClass(with description)") # => MyClass(with description) * * c.new # => # * From 10a68210b4c96ab2de21357907627e1fb34ce000 Mon Sep 17 00:00:00 2001 From: TOMITA Masahiro Date: Thu, 6 Nov 2025 09:09:14 +0900 Subject: [PATCH 08/19] [DOC] Fix IO::Buffer document --- io_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_buffer.c b/io_buffer.c index 85061076cd9476..f1afc3d3baf48e 100644 --- a/io_buffer.c +++ b/io_buffer.c @@ -571,7 +571,7 @@ io_buffer_for_yield_instance_ensure(VALUE _arguments) * buffer.get_string(0, 1) * # => "t" * string - * # => "best" + * # => "test" * * buffer.resize(100) * # in `resize': Cannot resize external buffer! (IO::Buffer::AccessError) @@ -3784,9 +3784,9 @@ io_buffer_not_inplace(VALUE self) * * File.write('test.txt', 'test data') * # => 9 - * buffer = IO::Buffer.map(File.open('test.txt')) + * buffer = IO::Buffer.map(File.open('test.txt'), nil, 0, IO::Buffer::READONLY) * # => - * # # + * # # * # ... * buffer.get_string(5, 2) # read 2 bytes, starting from offset 5 * # => "da" From 9154d72a3e342b6bf101d0d1e3c8bbd0feee3422 Mon Sep 17 00:00:00 2001 From: zverok Date: Tue, 23 Dec 2025 22:17:12 +0200 Subject: [PATCH 09/19] Improve CGI.escape* docs --- lib/cgi/escape.rb | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/cgi/escape.rb b/lib/cgi/escape.rb index 6d84773fdd62f6..555d24a5da2109 100644 --- a/lib/cgi/escape.rb +++ b/lib/cgi/escape.rb @@ -1,20 +1,28 @@ # frozen_string_literal: true -# :stopdoc +# Since Ruby 4.0, \CGI is a small holder for various escaping methods, included from CGI::Escape +# +# require 'cgi/escape' +# +# CGI.escape("Ruby programming language") +# #=> "Ruby+programming+language" +# CGI.escapeURIComponent("Ruby programming language") +# #=> "Ruby%20programming%20language" +# +# See CGI::Escape module for methods list and their description. class CGI module Escape; end include Escape extend Escape module EscapeExt; end # :nodoc: end -# :startdoc: -# Escape/unescape for CGI, HTML, URI. +# Web-related escape/unescape functionality. module CGI::Escape @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) # URL-encode a string into application/x-www-form-urlencoded. - # Space characters (+" "+) are encoded with plus signs (+"+"+) + # Space characters (" ") are encoded with plus signs ("+") # url_encoded_string = CGI.escape("'Stop!' said Fred") # # => "%27Stop%21%27+said+Fred" def escape(string) @@ -41,7 +49,7 @@ def unescape(string, encoding = @@accept_charset) end # URL-encode a string following RFC 3986 - # Space characters (+" "+) are encoded with (+"%20"+) + # Space characters (" ") are encoded with ("%20") # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred") # # => "%27Stop%21%27%20said%20Fred" def escapeURIComponent(string) @@ -69,7 +77,7 @@ def unescapeURIComponent(string, encoding = @@accept_charset) alias unescape_uri_component unescapeURIComponent # The set of special characters and their escaped values - TABLE_FOR_ESCAPE_HTML__ = { + TABLE_FOR_ESCAPE_HTML__ = { # :nodoc: "'" => ''', '&' => '&', '"' => '"', @@ -77,7 +85,7 @@ def unescapeURIComponent(string, encoding = @@accept_charset) '>' => '>', } - # Escape special characters in HTML, namely '&\"<> + # \Escape special characters in HTML, namely '&\"<> # CGI.escapeHTML('Usage: foo "bar" ') # # => "Usage: foo "bar" <baz>" def escapeHTML(string) @@ -160,11 +168,9 @@ def unescapeHTML(string) string.force_encoding enc end - # Synonym for CGI.escapeHTML(str) alias escape_html escapeHTML alias h escapeHTML - # Synonym for CGI.unescapeHTML(str) alias unescape_html unescapeHTML # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there @@ -175,7 +181,7 @@ def unescapeHTML(string) end end - # Escape only the tags of certain HTML elements in +string+. + # \Escape only the tags of certain HTML elements in +string+. # # Takes an element or elements or array of elements. Each element # is specified by the name of the element, without angle brackets. @@ -199,7 +205,7 @@ def escapeElement(string, *elements) end end - # Undo escaping such as that done by CGI.escapeElement() + # Undo escaping such as that done by CGI.escapeElement # # print CGI.unescapeElement( # CGI.escapeHTML('
'), "A", "IMG") @@ -219,10 +225,8 @@ def unescapeElement(string, *elements) end end - # Synonym for CGI.escapeElement(str) alias escape_element escapeElement - # Synonym for CGI.unescapeElement(str) alias unescape_element unescapeElement end From ab683d56bc625c83d815741b3bfd9c606b14517f Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 23 Dec 2025 20:46:57 -0600 Subject: [PATCH 10/19] [DOC] Cross-links between Japanese and English pages (#15705) * [DOC] Cross-links between Japanese and English pages --- COPYING | 2 ++ COPYING.ja | 2 ++ doc/extension.ja.rdoc | 2 ++ doc/extension.rdoc | 2 ++ 4 files changed, 8 insertions(+) diff --git a/COPYING b/COPYING index 48e5a96de7c82c..428ce03ed7e6f0 100644 --- a/COPYING +++ b/COPYING @@ -1,3 +1,5 @@ +{日本語}[rdoc-ref:COPYING.ja] + Ruby is copyrighted free software by Yukihiro Matsumoto . You can redistribute it and/or modify it under either the terms of the 2-clause BSDL (see the file BSDL), or the conditions below: diff --git a/COPYING.ja b/COPYING.ja index 230376bc603be2..5de2dbcc8f10eb 100644 --- a/COPYING.ja +++ b/COPYING.ja @@ -1,3 +1,5 @@ +{English}[rdoc-ref:COPYING] + 本プログラムはフリーソフトウェアです.2-clause BSDL または以下に示す条件で本プログラムを再配布できます 2-clause BSDLについてはBSDLファイルを参照して下さい. diff --git a/doc/extension.ja.rdoc b/doc/extension.ja.rdoc index 2f7856f3d439df..381b94a230b462 100644 --- a/doc/extension.ja.rdoc +++ b/doc/extension.ja.rdoc @@ -1,5 +1,7 @@ # extension.ja.rdoc - -*- RDoc -*- created at: Mon Aug 7 16:45:54 JST 1995 +{English}[rdoc-ref:extension.rdoc] + = Rubyの拡張ライブラリの作り方 Rubyの拡張ライブラリの作り方を説明します. diff --git a/doc/extension.rdoc b/doc/extension.rdoc index 6cf4c4926c93fc..18dc5817d45830 100644 --- a/doc/extension.rdoc +++ b/doc/extension.rdoc @@ -1,5 +1,7 @@ # extension.rdoc - -*- RDoc -*- created at: Mon Aug 7 16:45:54 JST 1995 +{日本語}[rdoc-ref:extension.ja.rdoc] + = Creating extension libraries for Ruby This document explains how to make extension libraries for Ruby. From 202028aea170e43609f5061548578a1b5681414b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 24 Dec 2025 11:56:41 +0900 Subject: [PATCH 11/19] Update the latest results of test-bundled-gems --- tool/test-bundled-gems.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tool/test-bundled-gems.rb b/tool/test-bundled-gems.rb index 98b6bb9048c5a8..778fe3311aa6f4 100644 --- a/tool/test-bundled-gems.rb +++ b/tool/test-bundled-gems.rb @@ -10,11 +10,9 @@ github_actions = ENV["GITHUB_ACTIONS"] == "true" DEFAULT_ALLOWED_FAILURES = RUBY_PLATFORM =~ /mswin|mingw/ ? [ - 'rbs', 'debug', 'irb', - 'power_assert', - 'net-imap', + 'csv', ] : [] allowed_failures = ENV['TEST_BUNDLED_GEMS_ALLOW_FAILURES'] || '' allowed_failures = allowed_failures.split(',').concat(DEFAULT_ALLOWED_FAILURES).uniq.reject(&:empty?) From 342d25785c0332ba556da2bae960d2a4f4b8baad Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 23 Dec 2025 22:11:38 -0500 Subject: [PATCH 12/19] [DOC] Fix backticks in Coverage.peek_result --- ext/coverage/coverage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c index 74d9f3ea4655fe..1f82193567eb1c 100644 --- a/ext/coverage/coverage.c +++ b/ext/coverage/coverage.c @@ -337,7 +337,7 @@ coverage_peek_result_i(st_data_t key, st_data_t val, st_data_t h) * Coverage.peek_result => hash * * Returns a hash that contains filename as key and coverage array as value. - * This is the same as `Coverage.result(stop: false, clear: false)`. + * This is the same as Coverage.result(stop: false, clear: false). * * { * "file.rb" => [1, 2, nil], From 7d2815d907e6dc7ce0075a8dd1a4e45b8e647921 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 24 Dec 2025 13:48:35 +0900 Subject: [PATCH 13/19] Add flag for prevent to update NEWS.md for release day. --- .github/workflows/bundled_gems.yml | 4 ++++ .github/workflows/default_gems_list.yml | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bundled_gems.yml b/.github/workflows/bundled_gems.yml index ad2e3915199102..5521752d2f517c 100644 --- a/.github/workflows/bundled_gems.yml +++ b/.github/workflows/bundled_gems.yml @@ -1,5 +1,8 @@ name: bundled_gems +env: + UPDATE_NEWS_ENABLED: false + on: push: branches: ['master'] @@ -67,6 +70,7 @@ jobs: - name: Maintain updated gems list in NEWS run: | ruby tool/update-NEWS-gemlist.rb bundled + if: ${{ env.UPDATE_NEWS_ENABLED == 'true' }} - name: Check diffs id: diff diff --git a/.github/workflows/default_gems_list.yml b/.github/workflows/default_gems_list.yml index 420228f3997d45..ba6d6ee73c528d 100644 --- a/.github/workflows/default_gems_list.yml +++ b/.github/workflows/default_gems_list.yml @@ -1,6 +1,9 @@ name: Update default gems list on: [push, pull_request, merge_group] +env: + UPDATE_NEWS_ENABLED: false + concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} @@ -65,7 +68,7 @@ jobs: - name: Maintain updated gems list in NEWS run: | ruby tool/update-NEWS-gemlist.rb default - if: ${{ steps.gems.outcome == 'success' }} + if: ${{ steps.gems.outcome == 'success' && env.UPDATE_NEWS_ENABLED == 'true' }} - name: Check diffs id: diff From 6af9b8d59af176183c15589afc64505f5ada692c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 24 Dec 2025 13:59:08 +0900 Subject: [PATCH 14/19] Minor update at stdlib section --- NEWS.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1b7efe4252464d..2c5a758e6b06c3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -280,6 +280,12 @@ Note: We're only listing outstanding class updates. ## Stdlib updates +We only list stdlib changes that are notable feature changes. + +Other changes are listed in the following sections. We also listed release +history from the previous bundled version that is Ruby 3.4.0 if it has GitHub +releases. + The following bundled gems are promoted from default gems. * ostruct 0.6.3 @@ -293,15 +299,6 @@ The following bundled gems are promoted from default gems. * readline 0.0.4 * fiddle 1.1.8 -The following bundled gems are added. - - -We only list stdlib changes that are notable feature changes. - -Other changes are listed in the following sections. We also listed release -history from the previous bundled version that is Ruby 3.4.0 if it has GitHub -releases. - The following default gem is added. * win32-registry 0.1.2 @@ -365,7 +362,7 @@ The following bundled gems are updated. ### RubyGems and Bundler -see the following links for details. +Ruby 4.0 bundled RubyGems and Bundler version 4. see the following links for details. * [Upgrading to RubyGems/Bundler 4 - RubyGems Blog](https://blog.rubygems.org/2025/12/03/upgrade-to-rubygems-bundler-4.html) * [4.0.0 Released - RubyGems Blog](https://blog.rubygems.org/2025/12/03/4.0.0-released.html) From 44693ee32990fdd609e91ebb2970b3110b3f08e5 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 24 Dec 2025 11:30:11 +0900 Subject: [PATCH 15/19] Fix a possible memory leak in dtoa Fix GH-15061 --- missing/dtoa.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/missing/dtoa.c b/missing/dtoa.c index cbd6e6ebae23fb..ba8cd46ebd9484 100644 --- a/missing/dtoa.c +++ b/missing/dtoa.c @@ -547,10 +547,13 @@ Balloc(int k) } static void -Bfree(Bigint *v) +Bclear(Bigint **vp) { - FREE(v); + Bigint *v = *vp; + *vp = NULL; + if (v) FREE(v); } +#define Bfree(v) Bclear(&(v)) #define Bcopy(x,y) memcpy((char *)&(x)->sign, (char *)&(y)->sign, \ (y)->wds*sizeof(Long) + 2*sizeof(int)) From fc19ce0a0187a8d69df88a1231ec93b0e3e990b2 Mon Sep 17 00:00:00 2001 From: yui-knk Date: Wed, 24 Dec 2025 11:14:44 +0900 Subject: [PATCH 16/19] Lrama v0.7.1 --- tool/lrama/NEWS.md | 405 ++++- tool/lrama/exe/lrama | 2 +- tool/lrama/lib/lrama.rb | 10 +- tool/lrama/lib/lrama/bitmap.rb | 23 +- tool/lrama/lib/lrama/command.rb | 138 +- tool/lrama/lib/lrama/context.rb | 46 +- tool/lrama/lib/lrama/counterexamples.rb | 304 +++- .../lib/lrama/counterexamples/derivation.rb | 18 +- .../lib/lrama/counterexamples/example.rb | 69 +- tool/lrama/lib/lrama/counterexamples/node.rb | 30 + tool/lrama/lib/lrama/counterexamples/path.rb | 26 +- .../lrama/counterexamples/production_path.rb | 19 - .../lib/lrama/counterexamples/start_path.rb | 23 - .../lib/lrama/counterexamples/state_item.rb | 25 +- .../lrama/counterexamples/transition_path.rb | 19 - .../lrama/lib/lrama/counterexamples/triple.rb | 36 +- tool/lrama/lib/lrama/diagnostics.rb | 36 - tool/lrama/lib/lrama/diagram.rb | 77 + tool/lrama/lib/lrama/digraph.rb | 35 +- tool/lrama/lib/lrama/erb.rb | 29 + tool/lrama/lib/lrama/grammar.rb | 266 ++- tool/lrama/lib/lrama/grammar/auxiliary.rb | 7 +- tool/lrama/lib/lrama/grammar/binding.rb | 62 +- tool/lrama/lib/lrama/grammar/code.rb | 17 +- .../lib/lrama/grammar/code/destructor_code.rb | 11 + .../lrama/grammar/code/initial_action_code.rb | 3 + .../lrama/grammar/code/no_reference_code.rb | 3 + .../lib/lrama/grammar/code/printer_code.rb | 11 + .../lib/lrama/grammar/code/rule_action.rb | 17 + tool/lrama/lib/lrama/grammar/counter.rb | 10 + tool/lrama/lib/lrama/grammar/destructor.rb | 15 +- tool/lrama/lib/lrama/grammar/error_token.rb | 15 +- tool/lrama/lib/lrama/grammar/inline.rb | 3 + .../lib/lrama/grammar/inline/resolver.rb | 80 + tool/lrama/lib/lrama/grammar/parameterized.rb | 5 + .../resolver.rb | 27 +- .../rhs.rb | 9 +- .../lib/lrama/grammar/parameterized/rule.rb | 36 + .../lib/lrama/grammar/parameterizing_rule.rb | 5 - .../lrama/grammar/parameterizing_rule/rule.rb | 24 - tool/lrama/lib/lrama/grammar/percent_code.rb | 13 +- tool/lrama/lib/lrama/grammar/precedence.rb | 44 +- tool/lrama/lib/lrama/grammar/printer.rb | 9 + tool/lrama/lib/lrama/grammar/reference.rb | 13 + tool/lrama/lib/lrama/grammar/rule.rb | 62 +- tool/lrama/lib/lrama/grammar/rule_builder.rb | 153 +- tool/lrama/lib/lrama/grammar/stdlib.y | 116 +- tool/lrama/lib/lrama/grammar/symbol.rb | 82 +- .../lib/lrama/grammar/symbols/resolver.rb | 67 +- tool/lrama/lib/lrama/grammar/type.rb | 14 +- tool/lrama/lib/lrama/grammar/union.rb | 13 +- tool/lrama/lib/lrama/grammar_validator.rb | 37 - tool/lrama/lib/lrama/lexer.rb | 74 +- tool/lrama/lib/lrama/lexer/location.rb | 33 +- tool/lrama/lib/lrama/lexer/token.rb | 62 +- tool/lrama/lib/lrama/lexer/token/base.rb | 73 + tool/lrama/lib/lrama/lexer/token/char.rb | 17 +- tool/lrama/lib/lrama/lexer/token/empty.rb | 14 + tool/lrama/lib/lrama/lexer/token/ident.rb | 4 +- .../lib/lrama/lexer/token/instantiate_rule.rb | 8 +- tool/lrama/lib/lrama/lexer/token/int.rb | 14 + tool/lrama/lib/lrama/lexer/token/str.rb | 11 + tool/lrama/lib/lrama/lexer/token/tag.rb | 4 +- tool/lrama/lib/lrama/lexer/token/token.rb | 11 + tool/lrama/lib/lrama/lexer/token/user_code.rb | 100 +- tool/lrama/lib/lrama/logger.rb | 14 +- tool/lrama/lib/lrama/option_parser.rb | 72 +- tool/lrama/lib/lrama/options.rb | 32 +- tool/lrama/lib/lrama/output.rb | 15 +- tool/lrama/lib/lrama/parser.rb | 1577 +++++++++-------- tool/lrama/lib/lrama/report.rb | 4 - tool/lrama/lib/lrama/report/duration.rb | 27 - tool/lrama/lib/lrama/report/profile.rb | 16 - tool/lrama/lib/lrama/reporter.rb | 39 + tool/lrama/lib/lrama/reporter/conflicts.rb | 44 + tool/lrama/lib/lrama/reporter/grammar.rb | 39 + tool/lrama/lib/lrama/reporter/precedences.rb | 54 + tool/lrama/lib/lrama/reporter/profile.rb | 4 + .../lib/lrama/reporter/profile/call_stack.rb | 45 + .../lib/lrama/reporter/profile/memory.rb | 44 + tool/lrama/lib/lrama/reporter/rules.rb | 43 + tool/lrama/lib/lrama/reporter/states.rb | 387 ++++ tool/lrama/lib/lrama/reporter/terms.rb | 44 + tool/lrama/lib/lrama/state.rb | 501 +++--- tool/lrama/lib/lrama/state/action.rb | 5 + tool/lrama/lib/lrama/state/action/goto.rb | 33 + tool/lrama/lib/lrama/state/action/reduce.rb | 71 + tool/lrama/lib/lrama/state/action/shift.rb | 39 + .../lib/lrama/state/inadequacy_annotation.rb | 140 ++ .../lrama/lib/lrama/{states => state}/item.rb | 37 +- tool/lrama/lib/lrama/state/reduce.rb | 37 - .../lib/lrama/state/reduce_reduce_conflict.rb | 15 +- .../lib/lrama/state/resolved_conflict.rb | 42 +- tool/lrama/lib/lrama/state/shift.rb | 15 - .../lib/lrama/state/shift_reduce_conflict.rb | 15 +- tool/lrama/lib/lrama/states.rb | 622 +++++-- tool/lrama/lib/lrama/states_reporter.rb | 362 ---- tool/lrama/lib/lrama/trace_reporter.rb | 45 - tool/lrama/lib/lrama/tracer.rb | 51 + tool/lrama/lib/lrama/tracer/actions.rb | 22 + tool/lrama/lib/lrama/tracer/closure.rb | 30 + tool/lrama/lib/lrama/tracer/duration.rb | 38 + .../lib/lrama/tracer/only_explicit_rules.rb | 24 + tool/lrama/lib/lrama/tracer/rules.rb | 23 + tool/lrama/lib/lrama/tracer/state.rb | 33 + tool/lrama/lib/lrama/version.rb | 3 +- tool/lrama/lib/lrama/warnings.rb | 33 + tool/lrama/lib/lrama/warnings/conflicts.rb | 27 + .../lib/lrama/warnings/implicit_empty.rb | 29 + .../lib/lrama/warnings/name_conflicts.rb | 63 + .../lib/lrama/warnings/redefined_rules.rb | 23 + tool/lrama/lib/lrama/warnings/required.rb | 23 + .../lib/lrama/warnings/useless_precedence.rb | 25 + tool/lrama/template/bison/_yacc.h | 8 + tool/lrama/template/diagram/diagram.html | 102 ++ 115 files changed, 5663 insertions(+), 2417 deletions(-) create mode 100644 tool/lrama/lib/lrama/counterexamples/node.rb delete mode 100644 tool/lrama/lib/lrama/counterexamples/production_path.rb delete mode 100644 tool/lrama/lib/lrama/counterexamples/start_path.rb delete mode 100644 tool/lrama/lib/lrama/counterexamples/transition_path.rb delete mode 100644 tool/lrama/lib/lrama/diagnostics.rb create mode 100644 tool/lrama/lib/lrama/diagram.rb create mode 100644 tool/lrama/lib/lrama/erb.rb create mode 100644 tool/lrama/lib/lrama/grammar/inline.rb create mode 100644 tool/lrama/lib/lrama/grammar/inline/resolver.rb create mode 100644 tool/lrama/lib/lrama/grammar/parameterized.rb rename tool/lrama/lib/lrama/grammar/{parameterizing_rule => parameterized}/resolver.rb (60%) rename tool/lrama/lib/lrama/grammar/{parameterizing_rule => parameterized}/rhs.rb (73%) create mode 100644 tool/lrama/lib/lrama/grammar/parameterized/rule.rb delete mode 100644 tool/lrama/lib/lrama/grammar/parameterizing_rule.rb delete mode 100644 tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb delete mode 100644 tool/lrama/lib/lrama/grammar_validator.rb create mode 100644 tool/lrama/lib/lrama/lexer/token/base.rb create mode 100644 tool/lrama/lib/lrama/lexer/token/empty.rb create mode 100644 tool/lrama/lib/lrama/lexer/token/int.rb create mode 100644 tool/lrama/lib/lrama/lexer/token/str.rb create mode 100644 tool/lrama/lib/lrama/lexer/token/token.rb delete mode 100644 tool/lrama/lib/lrama/report.rb delete mode 100644 tool/lrama/lib/lrama/report/duration.rb delete mode 100644 tool/lrama/lib/lrama/report/profile.rb create mode 100644 tool/lrama/lib/lrama/reporter.rb create mode 100644 tool/lrama/lib/lrama/reporter/conflicts.rb create mode 100644 tool/lrama/lib/lrama/reporter/grammar.rb create mode 100644 tool/lrama/lib/lrama/reporter/precedences.rb create mode 100644 tool/lrama/lib/lrama/reporter/profile.rb create mode 100644 tool/lrama/lib/lrama/reporter/profile/call_stack.rb create mode 100644 tool/lrama/lib/lrama/reporter/profile/memory.rb create mode 100644 tool/lrama/lib/lrama/reporter/rules.rb create mode 100644 tool/lrama/lib/lrama/reporter/states.rb create mode 100644 tool/lrama/lib/lrama/reporter/terms.rb create mode 100644 tool/lrama/lib/lrama/state/action.rb create mode 100644 tool/lrama/lib/lrama/state/action/goto.rb create mode 100644 tool/lrama/lib/lrama/state/action/reduce.rb create mode 100644 tool/lrama/lib/lrama/state/action/shift.rb create mode 100644 tool/lrama/lib/lrama/state/inadequacy_annotation.rb rename tool/lrama/lib/lrama/{states => state}/item.rb (61%) delete mode 100644 tool/lrama/lib/lrama/state/reduce.rb delete mode 100644 tool/lrama/lib/lrama/state/shift.rb delete mode 100644 tool/lrama/lib/lrama/states_reporter.rb delete mode 100644 tool/lrama/lib/lrama/trace_reporter.rb create mode 100644 tool/lrama/lib/lrama/tracer.rb create mode 100644 tool/lrama/lib/lrama/tracer/actions.rb create mode 100644 tool/lrama/lib/lrama/tracer/closure.rb create mode 100644 tool/lrama/lib/lrama/tracer/duration.rb create mode 100644 tool/lrama/lib/lrama/tracer/only_explicit_rules.rb create mode 100644 tool/lrama/lib/lrama/tracer/rules.rb create mode 100644 tool/lrama/lib/lrama/tracer/state.rb create mode 100644 tool/lrama/lib/lrama/warnings.rb create mode 100644 tool/lrama/lib/lrama/warnings/conflicts.rb create mode 100644 tool/lrama/lib/lrama/warnings/implicit_empty.rb create mode 100644 tool/lrama/lib/lrama/warnings/name_conflicts.rb create mode 100644 tool/lrama/lib/lrama/warnings/redefined_rules.rb create mode 100644 tool/lrama/lib/lrama/warnings/required.rb create mode 100644 tool/lrama/lib/lrama/warnings/useless_precedence.rb create mode 100644 tool/lrama/template/diagram/diagram.html diff --git a/tool/lrama/NEWS.md b/tool/lrama/NEWS.md index a535332ec37a32..f71118a9130f3e 100644 --- a/tool/lrama/NEWS.md +++ b/tool/lrama/NEWS.md @@ -1,8 +1,343 @@ # NEWS for Lrama +## Lrama 0.7.1 (2025-12-24) + +### Optimize IELR + +Optimized performance to a level that allows for IELR testing in practical applications. + +https://github.com/ruby/lrama/pull/595 +https://github.com/ruby/lrama/pull/605 +https://github.com/ruby/lrama/pull/685 +https://github.com/ruby/lrama/pull/700 + +### Introduce counterexamples timeout + +Counterexample searches can sometimes take a long time, so we've added a timeout to abort the process after a set period. The current limits are: + +* 10 seconds per case +* 120 seconds total (cumulative) + +Please note that these are hard-coded and cannot be modified by the user in the current version. + +https://github.com/ruby/lrama/pull/623 + +### Optimize Counterexamples + +Optimized counterexample search performance. + +https://github.com/ruby/lrama/pull/607 +https://github.com/ruby/lrama/pull/610 +https://github.com/ruby/lrama/pull/614 +https://github.com/ruby/lrama/pull/622 +https://github.com/ruby/lrama/pull/627 +https://github.com/ruby/lrama/pull/629 +https://github.com/ruby/lrama/pull/659 + +### Support parameterized rule's arguments include inline + +Allow to use %inline directive with Parameterized rules arguments. When an inline rule is used as an argument to a Parameterized rule, it expands inline at the point of use. + +```yacc +%rule %inline op : '+' + | '-' + ; +%% +operation : op? + ; +``` + +This expands to: + +```yacc +operation : /* empty */ + | '+' + | '-' + ; +``` + +https://github.com/ruby/lrama/pull/637 + +### Render conflicts of each state on output file + +Added token information for conflicts in the output file. +These information are useful when a state has many actions. + +``` +State 1 + + 4 class: keyword_class • tSTRING "end" + 5 $@1: ε • [tSTRING] + 7 class: keyword_class • $@1 tSTRING '!' "end" $@2 + 8 $@3: ε • [tSTRING] + 10 class: keyword_class • $@3 tSTRING '?' "end" $@4 + + Conflict on tSTRING. shift/reduce($@1) + Conflict on tSTRING. shift/reduce($@3) + Conflict on tSTRING. reduce($@1)/reduce($@3) + + tSTRING shift, and go to state 6 + + tSTRING reduce using rule 5 ($@1) + tSTRING reduce using rule 8 ($@3) + + $@1 go to state 7 + $@3 go to state 8 +``` + +https://github.com/ruby/lrama/pull/541 + +### Render the origin of conflicted tokens on output file + +For example, for the grammar file like below: + +``` +%% + +program: expr + ; + +expr: expr '+' expr + | tNUMBER + ; + +%% +``` + +Lrama generates output file which describes where `"plus"` (`'+'`) look ahead tokens come from: + +``` +State 6 + + 2 expr: expr • "plus" expr + 2 | expr "plus" expr • ["end of file", "plus"] + + Conflict on "plus". shift/reduce(expr) + "plus" comes from state 0 goto by expr + "plus" comes from state 5 goto by expr +``` + +state 0 and state 5 look like below: + +``` +State 0 + + 0 $accept: • program "end of file" + 1 program: • expr + 2 expr: • expr "plus" expr + 3 | • tNUMBER + + tNUMBER shift, and go to state 1 + + program go to state 2 + expr go to state 3 + +State 5 + + 2 expr: • expr "plus" expr + 2 | expr "plus" • expr + 3 | • tNUMBER + + tNUMBER shift, and go to state 1 + + expr go to state 6 +``` + +https://github.com/ruby/lrama/pull/726 + +### Render precedences usage information on output file + +For example, for the grammar file like below: + +``` +%left tPLUS +%right tUPLUS + +%% + +program: expr ; + +expr: tUPLUS expr + | expr tPLUS expr + | tNUMBER + ; + +%% +``` + +Lrama generates output file which describes where these precedences are used to resolve conflicts: + +``` +Precedences + precedence on "unary+" is used to resolve conflict on + LALR + state 5. Conflict between reduce by "expr -> tUPLUS expr" and shift "+" resolved as reduce ("+" < "unary+"). + precedence on "+" is used to resolve conflict on + LALR + state 5. Conflict between reduce by "expr -> tUPLUS expr" and shift "+" resolved as reduce ("+" < "unary+"). + state 8. Conflict between reduce by "expr -> expr tPLUS expr" and shift "+" resolved as reduce (%left "+"). +``` + +https://github.com/ruby/lrama/pull/741 + +### Add support for reporting Rule Usage Frequency + +Support to report rule usage frequency statistics for analyzing grammar characteristics. +Run `exe/lrama --report=rules` to show how frequently each terminal and non-terminal symbol is used in the grammar rules. + +```console +$ exe/lrama --report=rules sample/calc.y +Rule Usage Frequency + 0 tSTRING (4 times) + 1 keyword_class (3 times) + 2 keyword_end (3 times) + 3 '+' (2 times) + 4 string (2 times) + 5 string_1 (2 times) + 6 '!' (1 times) + 7 '-' (1 times) + 8 '?' (1 times) + 9 EOI (1 times) + 10 class (1 times) + 11 program (1 times) + 12 string_2 (1 times) + 13 strings_1 (1 times) + 14 strings_2 (1 times) + 15 tNUMBER (1 times) +``` + +This feature provides insights into the language characteristics by showing: +- Which symbols are most frequently used in the grammar +- The distribution of terminal and non-terminal usage +- Potential areas for grammar optimization or refactoring + +The frequency statistics help developers understand the grammar structure and can be useful for: +- Grammar complexity analysis +- Performance optimization hints +- Language design decisions +- Documentation and educational purposes + +https://github.com/ruby/lrama/pull/677 + +### Render Split States information on output file + +For example, for the grammar file like below: + +``` +%token a +%token b +%token c +%define lr.type ielr + +%precedence tLOWEST +%precedence a +%precedence tHIGHEST + +%% + +S: a A B a + | b A B b + ; + +A: a C D E + ; + +B: c + | // empty + ; + +C: D + ; + +D: a + ; + +E: a + | %prec tHIGHEST // empty + ; + +%% +``` + +Lrama generates output file which describes where which new states are created when IELR is enabled: + +``` +Split States + + State 19 is split from state 4 + State 20 is split from state 9 + State 21 is split from state 14 +``` + +https://github.com/ruby/lrama/pull/624 + +### Add ioption support to the Standard library + +Support `ioption` (inline option) rule, which is expanded inline without creating intermediate rules. + +Unlike the regular `option` rule that generates a separate rule, `ioption` directly expands at the point of use: + +```yacc +program: ioption(number) expr + +// Expanded inline to: + +program: expr + | number expr +``` + +This differs from the regular `option` which would generate: + +```yacc +program: option(number) expr + +// Expanded to: + +program: option_number expr +option_number: %empty + | number +``` + +The `ioption` rule provides more compact grammar generation by avoiding intermediate rule creation, which can be beneficial for reducing the parser's rule count and potentially improving performance. + +This feature is inspired by Menhir's standard library and maintains compatibility with [Menhir's `ioption` behavior](https://github.com/let-def/menhir/blob/e8ba7bef219acd355798072c42abbd11335ecf09/src/standard.mly#L33-L41). + +https://github.com/ruby/lrama/pull/666 + +### Syntax Diagrams + +Lrama provides an API for generating HTML syntax diagrams. These visual diagrams are highly useful as grammar development tools and can also serve as a form of automatic self-documentation. + +![Syntax Diagrams](https://github.com/user-attachments/assets/5d9bca77-93fd-4416-bc24-9a0f70693a22) + +If you use syntax diagrams, you add `--diagram` option. + +```console +$ exe/lrama --diagram sample.y +``` + +https://github.com/ruby/lrama/pull/523 + +### Support `--profile` option + +You can profile parser generation process without modification for Lrama source code. +Currently `--profile=call-stack` and `--profile=memory` are supported. + +```console +$ exe/lrama --profile=call-stack sample/calc.y +``` + +Then "tmp/stackprof-cpu-myapp.dump" is generated. + +https://github.com/ruby/lrama/pull/525 + +### Add support Start-Symbol: `%start` + +https://github.com/ruby/lrama/pull/576 + ## Lrama 0.7.0 (2025-01-21) -## [EXPERIMENTAL] Support the generation of the IELR(1) parser described in this paper +### [EXPERIMENTAL] Support the generation of the IELR(1) parser described in this paper Support the generation of the IELR(1) parser described in this paper. https://www.sciencedirect.com/science/article/pii/S0167642309001191 @@ -15,12 +350,12 @@ If you use IELR(1) parser, you can write the following directive in your grammar But, currently IELR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you. -## Support `-t` option as same as `--debug` option +### Support `-t` option as same as `--debug` option Support to `-t` option as same as `--debug` option. These options align with Bison behavior. So same as `--debug` option. -## Trace only explicit rules +### Trace only explicit rules Support to trace only explicit rules. If you use `--trace=rules` option, it shows include mid-rule actions. If you want to show only explicit rules, you can use `--trace=only-explicit-rules` option. @@ -97,9 +432,9 @@ nterm.y:6:7: symbol EOI redeclared as a nonterminal ## Lrama 0.6.10 (2024-09-11) -### Aliased Named References for actions of RHS in parameterizing rules +### Aliased Named References for actions of RHS in Parameterizing rules -Allow to use aliased named references for actions of RHS in parameterizing rules. +Allow to use aliased named references for actions of RHS in Parameterizing rules. ```yacc %rule sum(X, Y): X[summand] '+' Y[addend] { $$ = $summand + $addend } @@ -109,9 +444,9 @@ Allow to use aliased named references for actions of RHS in parameterizing rules https://github.com/ruby/lrama/pull/410 -### Named References for actions of RHS in parameterizing rules caller side +### Named References for actions of RHS in Parameterizing rules caller side -Allow to use named references for actions of RHS in parameterizing rules caller side. +Allow to use named references for actions of RHS in Parameterizing rules caller side. ```yacc opt_nl: '\n'?[nl] { $$ = $nl; } @@ -120,9 +455,9 @@ opt_nl: '\n'?[nl] { $$ = $nl; } https://github.com/ruby/lrama/pull/414 -### Widen the definable position of parameterizing rules +### Widen the definable position of Parameterizing rules -Allow to define parameterizing rules in the middle of the grammar. +Allow to define Parameterizing rules in the middle of the grammar. ```yacc %rule defined_option(X): /* empty */ @@ -186,15 +521,15 @@ Change to `%locations` directive not set by default. https://github.com/ruby/lrama/pull/446 -### Diagnostics report for parameterizing rules redefine +### Diagnostics report for parameterized rules redefine -Support to warning redefined parameterizing rules. -Run `exe/lrama -W` or `exe/lrama --warnings` to show redefined parameterizing rules. +Support to warning redefined parameterized rules. +Run `exe/lrama -W` or `exe/lrama --warnings` to show redefined parameterized rules. ```console $ exe/lrama -W sample/calc.y -parameterizing rule redefined: redefined_method(X) -parameterizing rule redefined: redefined_method(X) +parameterized rule redefined: redefined_method(X) +parameterized rule redefined: redefined_method(X) ``` https://github.com/ruby/lrama/pull/448 @@ -208,9 +543,9 @@ https://github.com/ruby/lrama/pull/457 ## Lrama 0.6.9 (2024-05-02) -### Callee side tag specification of parameterizing rules +### Callee side tag specification of Parameterizing rules -Allow to specify tag on callee side of parameterizing rules. +Allow to specify tag on callee side of Parameterizing rules. ```yacc %union { @@ -221,9 +556,9 @@ Allow to specify tag on callee side of parameterizing rules. ; ``` -### Named References for actions of RHS in parameterizing rules +### Named References for actions of RHS in Parameterizing rules -Allow to use named references for actions of RHS in parameterizing rules. +Allow to use named references for actions of RHS in Parameterizing rules. ```yacc %rule option(number): /* empty */ @@ -233,9 +568,9 @@ Allow to use named references for actions of RHS in parameterizing rules. ## Lrama 0.6.8 (2024-04-29) -### Nested parameterizing rules with tag +### Nested Parameterizing rules with tag -Allow to nested parameterizing rules with tag. +Allow to nested Parameterizing rules with tag. ```yacc %union { @@ -257,9 +592,9 @@ Allow to nested parameterizing rules with tag. ## Lrama 0.6.7 (2024-04-28) -### RHS of user defined parameterizing rules contains `'symbol'?`, `'symbol'+` and `'symbol'*`. +### RHS of user defined Parameterizing rules contains `'symbol'?`, `'symbol'+` and `'symbol'*`. -User can use `'symbol'?`, `'symbol'+` and `'symbol'*` in RHS of user defined parameterizing rules. +User can use `'symbol'?`, `'symbol'+` and `'symbol'*` in RHS of user defined Parameterizing rules. ``` %rule with_word_seps(X): /* empty */ @@ -319,7 +654,7 @@ expr : number { $$ = $1; } ### Typed Midrule Actions -User can specify the type of mid rule action by tag (``) instead of specifying it with in an action. +User can specify the type of mid-rule action by tag (``) instead of specifying it with in an action. ```yacc primary: k_case expr_value terms? @@ -394,7 +729,7 @@ https://github.com/ruby/lrama/pull/382 User can set codes for freeing semantic value resources by using `%destructor`. In general, these resources are freed by actions or after parsing. -However if syntax error happens in parsing, these codes may not be executed. +However, if syntax error happens in parsing, these codes may not be executed. Codes associated to `%destructor` are executed when semantic value is popped from the stack by an error. ```yacc @@ -432,7 +767,7 @@ Lrama introduces two features to support another semantic value stack by parser 1. Callback entry points User can emulate semantic value stack by these callbacks. -Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack. +Lrama provides these five callbacks. Registered functions are called when each event happens. For example %after-shift function is called when shift happens on original semantic value stack. * `%after-shift` function_name * `%before-reduce` function_name @@ -460,15 +795,15 @@ https://github.com/ruby/lrama/pull/367 ### %no-stdlib directive If `%no-stdlib` directive is set, Lrama doesn't load Lrama standard library for -parameterizing rules, stdlib.y. +parameterized rules, stdlib.y. https://github.com/ruby/lrama/pull/344 ## Lrama 0.6.1 (2024-01-13) -### Nested parameterizing rules +### Nested Parameterizing rules -Allow to pass an instantiated rule to other parameterizing rules. +Allow to pass an instantiated rule to other Parameterizing rules. ```yacc %rule constant(X) : X @@ -485,7 +820,7 @@ program : option(constant(number)) // Nested rule %% ``` -Allow to use nested parameterizing rules when define parameterizing rules. +Allow to use nested Parameterizing rules when define Parameterizing rules. ```yacc %rule option(x) : /* empty */ @@ -510,9 +845,9 @@ https://github.com/ruby/lrama/pull/337 ## Lrama 0.6.0 (2023-12-25) -### User defined parameterizing rules +### User defined Parameterizing rules -Allow to define parameterizing rule by `%rule` directive. +Allow to define Parameterizing rule by `%rule` directive. ```yacc %rule pair(X, Y): X Y { $$ = $1 + $2; } @@ -532,7 +867,7 @@ https://github.com/ruby/lrama/pull/285 ## Lrama 0.5.11 (2023-12-02) -### Type specification of parameterizing rules +### Type specification of Parameterizing rules Allow to specify type of rules by specifying tag, `` in below example. Tag is post-modification style. @@ -556,13 +891,13 @@ https://github.com/ruby/lrama/pull/272 ### Parameterizing rules (option, nonempty_list, list) -Support function call style parameterizing rules for `option`, `nonempty_list` and `list`. +Support function call style Parameterizing rules for `option`, `nonempty_list` and `list`. https://github.com/ruby/lrama/pull/197 ### Parameterizing rules (separated_list) -Support `separated_list` and `separated_nonempty_list` parameterizing rules. +Support `separated_list` and `separated_nonempty_list` Parameterizing rules. ```text program: separated_list(',', number) @@ -618,7 +953,7 @@ https://github.com/ruby/lrama/pull/181 ### Racc parser -Replace Lrama's parser from hand written parser to LR parser generated by Racc. +Replace Lrama's parser from handwritten parser to LR parser generated by Racc. Lrama uses `--embedded` option to generate LR parser because Racc is changed from default gem to bundled gem by Ruby 3.3 (https://github.com/ruby/lrama/pull/132). https://github.com/ruby/lrama/pull/62 diff --git a/tool/lrama/exe/lrama b/tool/lrama/exe/lrama index 1aece5d1410125..710ac0cb965888 100755 --- a/tool/lrama/exe/lrama +++ b/tool/lrama/exe/lrama @@ -4,4 +4,4 @@ $LOAD_PATH << File.join(__dir__, "../lib") require "lrama" -Lrama::Command.new.run(ARGV.dup) +Lrama::Command.new(ARGV.dup).run diff --git a/tool/lrama/lib/lrama.rb b/tool/lrama/lib/lrama.rb index fe2e05807c8994..56ba0044d4a651 100644 --- a/tool/lrama/lib/lrama.rb +++ b/tool/lrama/lib/lrama.rb @@ -4,19 +4,19 @@ require_relative "lrama/command" require_relative "lrama/context" require_relative "lrama/counterexamples" -require_relative "lrama/diagnostics" +require_relative "lrama/diagram" require_relative "lrama/digraph" +require_relative "lrama/erb" require_relative "lrama/grammar" -require_relative "lrama/grammar_validator" require_relative "lrama/lexer" require_relative "lrama/logger" require_relative "lrama/option_parser" require_relative "lrama/options" require_relative "lrama/output" require_relative "lrama/parser" -require_relative "lrama/report" +require_relative "lrama/reporter" require_relative "lrama/state" require_relative "lrama/states" -require_relative "lrama/states_reporter" -require_relative "lrama/trace_reporter" +require_relative "lrama/tracer" require_relative "lrama/version" +require_relative "lrama/warnings" diff --git a/tool/lrama/lib/lrama/bitmap.rb b/tool/lrama/lib/lrama/bitmap.rb index 098c6e0b777a2c..88b255b012463a 100644 --- a/tool/lrama/lib/lrama/bitmap.rb +++ b/tool/lrama/lib/lrama/bitmap.rb @@ -3,7 +3,10 @@ module Lrama module Bitmap - # @rbs (Array[Integer] ary) -> Integer + # @rbs! + # type bitmap = Integer + + # @rbs (Array[Integer] ary) -> bitmap def self.from_array(ary) bit = 0 @@ -14,21 +17,31 @@ def self.from_array(ary) bit end - # @rbs (Integer int) -> Array[Integer] + # @rbs (Integer int) -> bitmap + def self.from_integer(int) + 1 << int + end + + # @rbs (bitmap int) -> Array[Integer] def self.to_array(int) a = [] #: Array[Integer] i = 0 - while int > 0 do - if int & 1 == 1 + len = int.bit_length + while i < len do + if int[i] == 1 a << i end i += 1 - int >>= 1 end a end + + # @rbs (bitmap int, Integer size) -> Array[bool] + def self.to_bool_array(int, size) + Array.new(size) { |i| int[i] == 1 } + end end end diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb index 3ff39d578d0319..17aad1a1c112b1 100644 --- a/tool/lrama/lib/lrama/command.rb +++ b/tool/lrama/lib/lrama/command.rb @@ -5,64 +5,116 @@ class Command LRAMA_LIB = File.realpath(File.join(File.dirname(__FILE__))) STDLIB_FILE_PATH = File.join(LRAMA_LIB, 'grammar', 'stdlib.y') - def run(argv) - begin - options = OptionParser.new.parse(argv) - rescue => e - message = e.message - message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? - abort message - end - - Report::Duration.enable if options.trace_opts[:time] + def initialize(argv) + @logger = Lrama::Logger.new + @options = OptionParser.parse(argv) + @tracer = Tracer.new(STDERR, **@options.trace_opts) + @reporter = Reporter.new(**@options.report_opts) + @warnings = Warnings.new(@logger, @options.warnings) + rescue => e + abort format_error_message(e.message) + end - text = options.y.read - options.y.close if options.y != STDIN - begin - grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse - unless grammar.no_stdlib - stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse - grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) + def run + Lrama::Reporter::Profile::CallStack.report(@options.profile_opts[:call_stack]) do + Lrama::Reporter::Profile::Memory.report(@options.profile_opts[:memory]) do + execute_command_workflow end - grammar.prepare - grammar.validate! - rescue => e - raise e if options.debug - message = e.message - message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? - abort message end - states = Lrama::States.new(grammar, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) + end + + private + + def execute_command_workflow + @tracer.enable_duration + text = read_input + grammar = build_grammar(text) + states, context = compute_status(grammar) + render_reports(states) if @options.report_file + @tracer.trace(grammar) + render_diagram(grammar) + render_output(context, grammar) + states.validate!(@logger) + @warnings.warn(grammar, states) + end + + def read_input + text = @options.y.read + @options.y.close unless @options.y == STDIN + text + end + + def build_grammar(text) + grammar = + Lrama::Parser.new(text, @options.grammar_file, @options.debug, @options.locations, @options.define).parse + merge_stdlib(grammar) + prepare_grammar(grammar) + grammar + rescue => e + raise e if @options.debug + abort format_error_message(e.message) + end + + def format_error_message(message) + return message unless Exception.to_tty? + + message.gsub(/.+/, "\e[1m\\&\e[m") + end + + def merge_stdlib(grammar) + return if grammar.no_stdlib + + stdlib_text = File.read(STDLIB_FILE_PATH) + stdlib_grammar = Lrama::Parser.new( + stdlib_text, + STDLIB_FILE_PATH, + @options.debug, + @options.locations, + @options.define, + ).parse + + grammar.prepend_parameterized_rules(stdlib_grammar.parameterized_rules) + end + + def prepare_grammar(grammar) + grammar.prepare + grammar.validate! + end + + def compute_status(grammar) + states = Lrama::States.new(grammar, @tracer) states.compute states.compute_ielr if grammar.ielr_defined? - context = Lrama::Context.new(states) + [states, Lrama::Context.new(states)] + end - if options.report_file - reporter = Lrama::StatesReporter.new(states) - File.open(options.report_file, "w+") do |f| - reporter.report(f, **options.report_opts) - end + def render_reports(states) + File.open(@options.report_file, "w+") do |f| + @reporter.report(f, states) end + end - reporter = Lrama::TraceReporter.new(grammar) - reporter.report(**options.trace_opts) + def render_diagram(grammar) + return unless @options.diagram - File.open(options.outfile, "w+") do |f| + File.open(@options.diagram_file, "w+") do |f| + Lrama::Diagram.render(out: f, grammar: grammar) + end + end + + def render_output(context, grammar) + File.open(@options.outfile, "w+") do |f| Lrama::Output.new( out: f, - output_file_path: options.outfile, - template_name: options.skeleton, - grammar_file_path: options.grammar_file, - header_file_path: options.header_file, + output_file_path: @options.outfile, + template_name: @options.skeleton, + grammar_file_path: @options.grammar_file, + header_file_path: @options.header_file, context: context, grammar: grammar, - error_recovery: options.error_recovery, + error_recovery: @options.error_recovery, ).render end - - logger = Lrama::Logger.new - exit false unless Lrama::GrammarValidator.new(grammar, states, logger).valid? - Lrama::Diagnostics.new(grammar, states, logger).run(options.diagnostic) end end end diff --git a/tool/lrama/lib/lrama/context.rb b/tool/lrama/lib/lrama/context.rb index 9f406f8de0bc65..eb068c1b9e4880 100644 --- a/tool/lrama/lib/lrama/context.rb +++ b/tool/lrama/lib/lrama/context.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require_relative "report/duration" +require_relative "tracer/duration" module Lrama # This is passed to a template class Context - include Report::Duration + include Tracer::Duration ErrorActionNumber = -Float::INFINITY BaseMin = -Float::INFINITY @@ -231,8 +231,8 @@ def compute_yydefact end # Shift is selected when S/R conflict exists. - state.selected_term_transitions.each do |shift, next_state| - actions[shift.next_sym.number] = next_state.id + state.selected_term_transitions.each do |shift| + actions[shift.next_sym.number] = shift.to_state.id end state.resolved_conflicts.select do |conflict| @@ -292,18 +292,18 @@ def compute_yydefgoto # of a default nterm transition destination. @yydefgoto = Array.new(@states.nterms.count, 0) # Mapping from nterm to next_states - nterm_to_next_states = {} + nterm_to_to_states = {} @states.states.each do |state| - state.nterm_transitions.each do |shift, next_state| - key = shift.next_sym - nterm_to_next_states[key] ||= [] - nterm_to_next_states[key] << [state, next_state] # [from_state, to_state] + state.nterm_transitions.each do |goto| + key = goto.next_sym + nterm_to_to_states[key] ||= [] + nterm_to_to_states[key] << [state, goto.to_state] # [from_state, to_state] end end @states.nterms.each do |nterm| - if (states = nterm_to_next_states[nterm]) + if (states = nterm_to_to_states[nterm]) default_state = states.map(&:last).group_by {|s| s }.max_by {|_, v| v.count }.first default_goto = default_state.id not_default_gotos = [] @@ -417,27 +417,25 @@ def compute_packed_table res = lowzero - froms_and_tos.first[0] + # Find the smallest `res` such that `@table[res + from]` is empty for all `from` in `froms_and_tos` while true do - ok = true + advanced = false - froms_and_tos.each do |from, to| - loc = res + from - - if @table[loc] - # If the cell of table is set, can not use the cell. - ok = false - break - end + while used_res[res] + res += 1 + advanced = true end - if ok && used_res[res] - ok = false + froms_and_tos.each do |from, to| + while @table[res + from] + res += 1 + advanced = true + end end - if ok + unless advanced + # no advance means that the current `res` satisfies the condition break - else - res += 1 end end diff --git a/tool/lrama/lib/lrama/counterexamples.rb b/tool/lrama/lib/lrama/counterexamples.rb index ee2b5d5959539d..60d830d048e96d 100644 --- a/tool/lrama/lib/lrama/counterexamples.rb +++ b/tool/lrama/lib/lrama/counterexamples.rb @@ -1,35 +1,64 @@ +# rbs_inline: enabled # frozen_string_literal: true require "set" +require "timeout" require_relative "counterexamples/derivation" require_relative "counterexamples/example" +require_relative "counterexamples/node" require_relative "counterexamples/path" -require_relative "counterexamples/production_path" -require_relative "counterexamples/start_path" require_relative "counterexamples/state_item" -require_relative "counterexamples/transition_path" require_relative "counterexamples/triple" module Lrama # See: https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf # 4. Constructing Nonunifying Counterexamples class Counterexamples - attr_reader :transitions, :productions - + PathSearchTimeLimit = 10 # 10 sec + CumulativeTimeLimit = 120 # 120 sec + + # @rbs! + # @states: States + # @iterate_count: Integer + # @total_duration: Float + # @exceed_cumulative_time_limit: bool + # @state_items: Hash[[State, State::Item], StateItem] + # @triples: Hash[Integer, Triple] + # @transitions: Hash[[StateItem, Grammar::Symbol], StateItem] + # @reverse_transitions: Hash[[StateItem, Grammar::Symbol], Set[StateItem]] + # @productions: Hash[StateItem, Set[StateItem]] + # @reverse_productions: Hash[[State, Grammar::Symbol], Set[StateItem]] # Grammar::Symbol is nterm + # @state_item_shift: Integer + + attr_reader :transitions #: Hash[[StateItem, Grammar::Symbol], StateItem] + attr_reader :productions #: Hash[StateItem, Set[StateItem]] + + # @rbs (States states) -> void def initialize(states) @states = states + @iterate_count = 0 + @total_duration = 0 + @exceed_cumulative_time_limit = false + @triples = {} + setup_state_items setup_transitions setup_productions end + # @rbs () -> "#" def to_s "#" end alias :inspect :to_s + # @rbs (State conflict_state) -> Array[Example] def compute(conflict_state) conflict_state.conflicts.flat_map do |conflict| + # Check cumulative time limit for not each path search method call but each conflict + # to avoid one of example's path to be nil. + next if @exceed_cumulative_time_limit + case conflict.type when :shift_reduce # @type var conflict: State::ShiftReduceConflict @@ -38,22 +67,50 @@ def compute(conflict_state) # @type var conflict: State::ReduceReduceConflict reduce_reduce_examples(conflict_state, conflict) end + rescue Timeout::Error => e + STDERR.puts "Counterexamples calculation for state #{conflict_state.id} #{e.message} with #{@iterate_count} iteration" + increment_total_duration(PathSearchTimeLimit) + nil end.compact end private + # @rbs (State state, State::Item item) -> StateItem + def get_state_item(state, item) + @state_items[[state, item]] + end + + # For optimization, create all StateItem in advance + # and use them by fetching an instance from `@state_items`. + # Do not create new StateItem instance in the shortest path search process + # to avoid miss hash lookup. + # + # @rbs () -> void + def setup_state_items + @state_items = {} + count = 0 + + @states.states.each do |state| + state.items.each do |item| + @state_items[[state, item]] = StateItem.new(count, state, item) + count += 1 + end + end + + @state_item_shift = Math.log(count, 2).ceil + end + + # @rbs () -> void def setup_transitions - # Hash [StateItem, Symbol] => StateItem @transitions = {} - # Hash [StateItem, Symbol] => Set(StateItem) @reverse_transitions = {} @states.states.each do |src_state| trans = {} #: Hash[Grammar::Symbol, State] - src_state.transitions.each do |shift, next_state| - trans[shift.next_sym] = next_state + src_state.transitions.each do |transition| + trans[transition.next_sym] = transition.to_state end src_state.items.each do |src_item| @@ -63,8 +120,8 @@ def setup_transitions dest_state.kernels.each do |dest_item| next unless (src_item.rule == dest_item.rule) && (src_item.position + 1 == dest_item.position) - src_state_item = StateItem.new(src_state, src_item) - dest_state_item = StateItem.new(dest_state, dest_item) + src_state_item = get_state_item(src_state, src_item) + dest_state_item = get_state_item(dest_state, dest_item) @transitions[[src_state_item, sym]] = dest_state_item @@ -77,21 +134,20 @@ def setup_transitions end end + # @rbs () -> void def setup_productions - # Hash [StateItem] => Set(Item) @productions = {} - # Hash [State, Symbol] => Set(Item). Symbol is nterm @reverse_productions = {} @states.states.each do |state| - # LHS => Set(Item) - h = {} #: Hash[Grammar::Symbol, Set[States::Item]] + # Grammar::Symbol is LHS + h = {} #: Hash[Grammar::Symbol, Set[StateItem]] state.closure.each do |item| sym = item.lhs h[sym] ||= Set.new - h[sym] << item + h[sym] << get_state_item(state, item) end state.items.each do |item| @@ -99,101 +155,118 @@ def setup_productions next if item.next_sym.term? sym = item.next_sym - state_item = StateItem.new(state, item) - # @type var key: [State, Grammar::Symbol] - key = [state, sym] - + state_item = get_state_item(state, item) @productions[state_item] = h[sym] + # @type var key: [State, Grammar::Symbol] + key = [state, sym] @reverse_productions[key] ||= Set.new - @reverse_productions[key] << item + @reverse_productions[key] << state_item end end end + # For optimization, use same Triple if it's already created. + # Do not create new Triple instance anywhere else + # to avoid miss hash lookup. + # + # @rbs (StateItem state_item, Bitmap::bitmap precise_lookahead_set) -> Triple + def get_triple(state_item, precise_lookahead_set) + key = (precise_lookahead_set << @state_item_shift) | state_item.id + @triples[key] ||= Triple.new(state_item, precise_lookahead_set) + end + + # @rbs (State conflict_state, State::ShiftReduceConflict conflict) -> Example def shift_reduce_example(conflict_state, conflict) conflict_symbol = conflict.symbols.first - # @type var shift_conflict_item: ::Lrama::States::Item + # @type var shift_conflict_item: ::Lrama::State::Item shift_conflict_item = conflict_state.items.find { |item| item.next_sym == conflict_symbol } - path2 = shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) - path1 = find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + path2 = with_timeout("#shortest_path:") do + shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) + end + path1 = with_timeout("#find_shift_conflict_shortest_path:") do + find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + end Example.new(path1, path2, conflict, conflict_symbol, self) end + # @rbs (State conflict_state, State::ReduceReduceConflict conflict) -> Example def reduce_reduce_examples(conflict_state, conflict) conflict_symbol = conflict.symbols.first - path1 = shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) - path2 = shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + path1 = with_timeout("#shortest_path:") do + shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) + end + path2 = with_timeout("#shortest_path:") do + shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + end Example.new(path1, path2, conflict, conflict_symbol, self) end - def find_shift_conflict_shortest_path(reduce_path, conflict_state, conflict_item) - state_items = find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) - build_paths_from_state_items(state_items) - end + # @rbs (Array[StateItem]? reduce_state_items, State conflict_state, State::Item conflict_item) -> Array[StateItem] + def find_shift_conflict_shortest_path(reduce_state_items, conflict_state, conflict_item) + time1 = Time.now.to_f + @iterate_count = 0 - def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) - target_state_item = StateItem.new(conflict_state, conflict_item) + target_state_item = get_state_item(conflict_state, conflict_item) result = [target_state_item] - reversed_reduce_path = reduce_path.to_a.reverse + reversed_state_items = reduce_state_items.to_a.reverse # Index for state_item i = 0 - while (path = reversed_reduce_path[i]) + while (state_item = reversed_state_items[i]) # Index for prev_state_item j = i + 1 _j = j - while (prev_path = reversed_reduce_path[j]) - if prev_path.production? + while (prev_state_item = reversed_state_items[j]) + if prev_state_item.type == :production j += 1 else break end end - state_item = path.to - prev_state_item = prev_path&.to - if target_state_item == state_item || target_state_item.item.start_item? result.concat( - reversed_reduce_path[_j..-1] #: Array[StartPath|TransitionPath|ProductionPath] - .map(&:to)) + reversed_state_items[_j..-1] #: Array[StateItem] + ) break end - if target_state_item.item.beginning_of_rule? - queue = [] #: Array[Array[StateItem]] - queue << [target_state_item] + if target_state_item.type == :production + queue = [] #: Array[Node[StateItem]] + queue << Node.new(target_state_item, nil) # Find reverse production while (sis = queue.shift) - si = sis.last + @iterate_count += 1 + si = sis.elem # Reach to start state if si.item.start_item? - sis.shift - result.concat(sis) + a = Node.to_a(sis).reverse + a.shift + result.concat(a) target_state_item = si break end - if si.item.beginning_of_rule? + if si.type == :production # @type var key: [State, Grammar::Symbol] key = [si.state, si.item.lhs] - @reverse_productions[key].each do |item| - state_item = StateItem.new(si.state, item) - queue << (sis + [state_item]) + @reverse_productions[key].each do |state_item| + queue << Node.new(state_item, sis) end else # @type var key: [StateItem, Grammar::Symbol] key = [si, si.item.previous_sym] @reverse_transitions[key].each do |prev_target_state_item| next if prev_target_state_item.state != prev_state_item&.state - sis.shift - result.concat(sis) + a = Node.to_a(sis).reverse + a.shift + result.concat(a) result << prev_target_state_item target_state_item = prev_target_state_item i = j @@ -216,68 +289,106 @@ def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, confli end end + time2 = Time.now.to_f + duration = time2 - time1 + increment_total_duration(duration) + + if Tracer::Duration.enabled? + STDERR.puts sprintf(" %s %10.5f s", "find_shift_conflict_shortest_path #{@iterate_count} iteration", duration) + end + result.reverse end - def build_paths_from_state_items(state_items) - state_items.zip([nil] + state_items).map do |si, prev_si| - case - when prev_si.nil? - StartPath.new(si) - when si.item.beginning_of_rule? - ProductionPath.new(prev_si, si) - else - TransitionPath.new(prev_si, si) + # @rbs (StateItem target) -> Set[StateItem] + def reachable_state_items(target) + result = Set.new + queue = [target] + + while (state_item = queue.shift) + next if result.include?(state_item) + result << state_item + + @reverse_transitions[[state_item, state_item.item.previous_sym]]&.each do |prev_state_item| + queue << prev_state_item + end + + if state_item.item.beginning_of_rule? + @reverse_productions[[state_item.state, state_item.item.lhs]]&.each do |si| + queue << si + end end end + + result end + # @rbs (State conflict_state, State::Item conflict_reduce_item, Grammar::Symbol conflict_term) -> ::Array[StateItem]? def shortest_path(conflict_state, conflict_reduce_item, conflict_term) - # queue: is an array of [Triple, [Path]] - queue = [] #: Array[[Triple, Array[StartPath|TransitionPath|ProductionPath]]] + time1 = Time.now.to_f + @iterate_count = 0 + + queue = [] #: Array[[Triple, Path]] visited = {} #: Hash[Triple, true] start_state = @states.states.first #: Lrama::State + conflict_term_bit = Bitmap::from_integer(conflict_term.number) raise "BUG: Start state should be just one kernel." if start_state.kernels.count != 1 + reachable = reachable_state_items(get_state_item(conflict_state, conflict_reduce_item)) + start = get_triple(get_state_item(start_state, start_state.kernels.first), Bitmap::from_integer(@states.eof_symbol.number)) - start = Triple.new(start_state, start_state.kernels.first, Set.new([@states.eof_symbol])) + queue << [start, Path.new(start.state_item, nil)] - queue << [start, [StartPath.new(start.state_item)]] + while (triple, path = queue.shift) + @iterate_count += 1 - while true - triple, paths = queue.shift + # Found + if (triple.state == conflict_state) && (triple.item == conflict_reduce_item) && (triple.l & conflict_term_bit != 0) + state_items = [path.state_item] - next if visited[triple] - visited[triple] = true + while (path = path.parent) + state_items << path.state_item + end - # Found - if triple.state == conflict_state && triple.item == conflict_reduce_item && triple.l.include?(conflict_term) - return paths + time2 = Time.now.to_f + duration = time2 - time1 + increment_total_duration(duration) + + if Tracer::Duration.enabled? + STDERR.puts sprintf(" %s %10.5f s", "shortest_path #{@iterate_count} iteration", duration) + end + + return state_items.reverse end # transition - triple.state.transitions.each do |shift, next_state| - next unless triple.item.next_sym && triple.item.next_sym == shift.next_sym - next_state.kernels.each do |kernel| - next if kernel.rule != triple.item.rule - t = Triple.new(next_state, kernel, triple.l) - queue << [t, paths + [TransitionPath.new(triple.state_item, t.state_item)]] + next_state_item = @transitions[[triple.state_item, triple.item.next_sym]] + if next_state_item && reachable.include?(next_state_item) + # @type var t: Triple + t = get_triple(next_state_item, triple.l) + unless visited[t] + visited[t] = true + queue << [t, Path.new(t.state_item, path)] end end # production step - triple.state.closure.each do |item| - next unless triple.item.next_sym && triple.item.next_sym == item.lhs + @productions[triple.state_item]&.each do |si| + next unless reachable.include?(si) + l = follow_l(triple.item, triple.l) - t = Triple.new(triple.state, item, l) - queue << [t, paths + [ProductionPath.new(triple.state_item, t.state_item)]] + # @type var t: Triple + t = get_triple(si, l) + unless visited[t] + visited[t] = true + queue << [t, Path.new(t.state_item, path)] + end end - - break if queue.empty? end return nil end + # @rbs (State::Item item, Bitmap::bitmap current_l) -> Bitmap::bitmap def follow_l(item, current_l) # 1. follow_L (A -> X1 ... Xn-1 • Xn) = L # 2. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = {Xk+2} if Xk+2 is a terminal @@ -287,11 +398,28 @@ def follow_l(item, current_l) when item.number_of_rest_symbols == 1 current_l when item.next_next_sym.term? - Set.new([item.next_next_sym]) + item.next_next_sym.number_bitmap when !item.next_next_sym.nullable - item.next_next_sym.first_set + item.next_next_sym.first_set_bitmap else - item.next_next_sym.first_set + follow_l(item.new_by_next_position, current_l) + item.next_next_sym.first_set_bitmap | follow_l(item.new_by_next_position, current_l) + end + end + + # @rbs [T] (String message) { -> T } -> T + def with_timeout(message) + Timeout.timeout(PathSearchTimeLimit, Timeout::Error, message + " timeout of #{PathSearchTimeLimit} sec exceeded") do + yield + end + end + + # @rbs (Float|Integer duration) -> void + def increment_total_duration(duration) + @total_duration += duration + + if !@exceed_cumulative_time_limit && @total_duration > CumulativeTimeLimit + @exceed_cumulative_time_limit = true + STDERR.puts "CumulativeTimeLimit #{CumulativeTimeLimit} sec exceeded then skip following Counterexamples calculation" end end end diff --git a/tool/lrama/lib/lrama/counterexamples/derivation.rb b/tool/lrama/lib/lrama/counterexamples/derivation.rb index 368d7f1032f92d..a2b74767a941cf 100644 --- a/tool/lrama/lib/lrama/counterexamples/derivation.rb +++ b/tool/lrama/lib/lrama/counterexamples/derivation.rb @@ -1,34 +1,44 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Counterexamples class Derivation - attr_reader :item, :left, :right - attr_writer :right + # @rbs! + # @item: State::Item + # @left: Derivation? - def initialize(item, left, right = nil) + attr_reader :item #: State::Item + attr_reader :left #: Derivation? + attr_accessor :right #: Derivation? + + # @rbs (State::Item item, Derivation? left) -> void + def initialize(item, left) @item = item @left = left - @right = right end + # @rbs () -> ::String def to_s "#" end alias :inspect :to_s + # @rbs () -> Array[String] def render_strings_for_report result = [] #: Array[String] _render_for_report(self, 0, result, 0) result.map(&:rstrip) end + # @rbs () -> String def render_for_report render_strings_for_report.join("\n") end private + # @rbs (Derivation derivation, Integer offset, Array[String] strings, Integer index) -> Integer def _render_for_report(derivation, offset, strings, index) item = derivation.item if strings[index] diff --git a/tool/lrama/lib/lrama/counterexamples/example.rb b/tool/lrama/lib/lrama/counterexamples/example.rb index bb08428fcd8f11..c007f45af4ee38 100644 --- a/tool/lrama/lib/lrama/counterexamples/example.rb +++ b/tool/lrama/lib/lrama/counterexamples/example.rb @@ -1,12 +1,31 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Counterexamples class Example - attr_reader :path1, :path2, :conflict, :conflict_symbol + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @path1: ::Array[StateItem] + # @path2: ::Array[StateItem] + # @conflict: State::conflict + # @conflict_symbol: Grammar::Symbol + # @counterexamples: Counterexamples + # @derivations1: Derivation + # @derivations2: Derivation + + attr_reader :path1 #: ::Array[StateItem] + attr_reader :path2 #: ::Array[StateItem] + attr_reader :conflict #: State::conflict + attr_reader :conflict_symbol #: Grammar::Symbol # path1 is shift conflict when S/R conflict # path2 is always reduce conflict + # + # @rbs (Array[StateItem]? path1, Array[StateItem]? path2, State::conflict conflict, Grammar::Symbol conflict_symbol, Counterexamples counterexamples) -> void def initialize(path1, path2, conflict, conflict_symbol, counterexamples) @path1 = path1 @path2 = path2 @@ -15,69 +34,75 @@ def initialize(path1, path2, conflict, conflict_symbol, counterexamples) @counterexamples = counterexamples end + # @rbs () -> (:shift_reduce | :reduce_reduce) def type @conflict.type end + # @rbs () -> State::Item def path1_item - @path1.last.to.item + @path1.last.item end + # @rbs () -> State::Item def path2_item - @path2.last.to.item + @path2.last.item end + # @rbs () -> Derivation def derivations1 @derivations1 ||= _derivations(path1) end + # @rbs () -> Derivation def derivations2 @derivations2 ||= _derivations(path2) end private - def _derivations(paths) + # @rbs (Array[StateItem] state_items) -> Derivation + def _derivations(state_items) derivation = nil #: Derivation current = :production - last_path = paths.last #: Path - lookahead_sym = last_path.to.item.end_of_rule? ? @conflict_symbol : nil + last_state_item = state_items.last #: StateItem + lookahead_sym = last_state_item.item.end_of_rule? ? @conflict_symbol : nil - paths.reverse_each do |path| - item = path.to.item + state_items.reverse_each do |si| + item = si.item case current when :production - case path - when StartPath + case si.type + when :start derivation = Derivation.new(item, derivation) current = :start - when TransitionPath + when :transition derivation = Derivation.new(item, derivation) current = :transition - when ProductionPath + when :production derivation = Derivation.new(item, derivation) current = :production else - raise "Unexpected. #{path}" + raise "Unexpected. #{si}" end if lookahead_sym && item.next_next_sym && item.next_next_sym.first_set.include?(lookahead_sym) - state_item = @counterexamples.transitions[[path.to, item.next_sym]] - derivation2 = find_derivation_for_symbol(state_item, lookahead_sym) + si2 = @counterexamples.transitions[[si, item.next_sym]] + derivation2 = find_derivation_for_symbol(si2, lookahead_sym) derivation.right = derivation2 # steep:ignore lookahead_sym = nil end when :transition - case path - when StartPath + case si.type + when :start derivation = Derivation.new(item, derivation) current = :start - when TransitionPath + when :transition # ignore current = :transition - when ProductionPath + when :production # ignore current = :production end @@ -91,6 +116,7 @@ def _derivations(paths) derivation end + # @rbs (StateItem state_item, Grammar::Symbol sym) -> Derivation? def find_derivation_for_symbol(state_item, sym) queue = [] #: Array[Array[StateItem]] queue << [state_item] @@ -110,9 +136,8 @@ def find_derivation_for_symbol(state_item, sym) end if next_sym.nterm? && next_sym.first_set.include?(sym) - @counterexamples.productions[si].each do |next_item| - next if next_item.empty_rule? - next_si = StateItem.new(si.state, next_item) + @counterexamples.productions[si].each do |next_si| + next if next_si.item.empty_rule? next if sis.include?(next_si) queue << (sis + [next_si]) end diff --git a/tool/lrama/lib/lrama/counterexamples/node.rb b/tool/lrama/lib/lrama/counterexamples/node.rb new file mode 100644 index 00000000000000..9214a0e7f1315e --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/node.rb @@ -0,0 +1,30 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Counterexamples + # @rbs generic E < Object -- Type of an element + class Node + attr_reader :elem #: E + attr_reader :next_node #: Node[E]? + + # @rbs [E < Object] (Node[E] node) -> Array[E] + def self.to_a(node) + a = [] # steep:ignore UnannotatedEmptyCollection + + while (node) + a << node.elem + node = node.next_node + end + + a + end + + # @rbs (E elem, Node[E]? next_node) -> void + def initialize(elem, next_node) + @elem = elem + @next_node = next_node + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/path.rb b/tool/lrama/lib/lrama/counterexamples/path.rb index 0a5823dd21690d..6b1325f73b1abf 100644 --- a/tool/lrama/lib/lrama/counterexamples/path.rb +++ b/tool/lrama/lib/lrama/counterexamples/path.rb @@ -1,29 +1,27 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Counterexamples class Path - def initialize(from_state_item, to_state_item) - @from_state_item = from_state_item - @to_state_item = to_state_item - end + # @rbs! + # @state_item: StateItem + # @parent: Path? - def from - @from_state_item - end + attr_reader :state_item #: StateItem + attr_reader :parent #: Path? - def to - @to_state_item + # @rbs (StateItem state_item, Path? parent) -> void + def initialize(state_item, parent) + @state_item = state_item + @parent = parent end + # @rbs () -> ::String def to_s - "#" + "#" end alias :inspect :to_s - - def type - raise NotImplementedError - end end end end diff --git a/tool/lrama/lib/lrama/counterexamples/production_path.rb b/tool/lrama/lib/lrama/counterexamples/production_path.rb deleted file mode 100644 index 0a230c7fce72fd..00000000000000 --- a/tool/lrama/lib/lrama/counterexamples/production_path.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Counterexamples - class ProductionPath < Path - def type - :production - end - - def transition? - false - end - - def production? - true - end - end - end -end diff --git a/tool/lrama/lib/lrama/counterexamples/start_path.rb b/tool/lrama/lib/lrama/counterexamples/start_path.rb deleted file mode 100644 index c0351c8248972a..00000000000000 --- a/tool/lrama/lib/lrama/counterexamples/start_path.rb +++ /dev/null @@ -1,23 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Counterexamples - class StartPath < Path - def initialize(to_state_item) - super nil, to_state_item - end - - def type - :start - end - - def transition? - false - end - - def production? - false - end - end - end -end diff --git a/tool/lrama/lib/lrama/counterexamples/state_item.rb b/tool/lrama/lib/lrama/counterexamples/state_item.rb index c919818324c2f4..8c2481d7938122 100644 --- a/tool/lrama/lib/lrama/counterexamples/state_item.rb +++ b/tool/lrama/lib/lrama/counterexamples/state_item.rb @@ -1,8 +1,31 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Counterexamples - class StateItem < Struct.new(:state, :item) + class StateItem + attr_reader :id #: Integer + attr_reader :state #: State + attr_reader :item #: State::Item + + # @rbs (Integer id, State state, State::Item item) -> void + def initialize(id, state, item) + @id = id + @state = state + @item = item + end + + # @rbs () -> (:start | :transition | :production) + def type + case + when item.start_item? + :start + when item.beginning_of_rule? + :production + else + :transition + end + end end end end diff --git a/tool/lrama/lib/lrama/counterexamples/transition_path.rb b/tool/lrama/lib/lrama/counterexamples/transition_path.rb deleted file mode 100644 index 47bfbc4f98d15d..00000000000000 --- a/tool/lrama/lib/lrama/counterexamples/transition_path.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Counterexamples - class TransitionPath < Path - def type - :transition - end - - def transition? - true - end - - def production? - false - end - end - end -end diff --git a/tool/lrama/lib/lrama/counterexamples/triple.rb b/tool/lrama/lib/lrama/counterexamples/triple.rb index 64014ee223521b..98fe051f530f8d 100644 --- a/tool/lrama/lib/lrama/counterexamples/triple.rb +++ b/tool/lrama/lib/lrama/counterexamples/triple.rb @@ -1,21 +1,39 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Counterexamples - # s: state - # itm: item within s - # l: precise lookahead set - class Triple < Struct.new(:s, :itm, :l) - alias :state :s - alias :item :itm - alias :precise_lookahead_set :l + class Triple + attr_reader :precise_lookahead_set #: Bitmap::bitmap + alias :l :precise_lookahead_set + + # @rbs (StateItem state_item, Bitmap::bitmap precise_lookahead_set) -> void + def initialize(state_item, precise_lookahead_set) + @state_item = state_item + @precise_lookahead_set = precise_lookahead_set + end + + # @rbs () -> State + def state + @state_item.state + end + alias :s :state + + # @rbs () -> State::Item + def item + @state_item.item + end + alias :itm :item + + # @rbs () -> StateItem def state_item - StateItem.new(state, item) + @state_item end + # @rbs () -> ::String def inspect - "#{state.inspect}. #{item.display_name}. #{l.map(&:id).map(&:s_value)}" + "#{state.inspect}. #{item.display_name}. #{l.to_s(2)}" end alias :to_s :inspect end diff --git a/tool/lrama/lib/lrama/diagnostics.rb b/tool/lrama/lib/lrama/diagnostics.rb deleted file mode 100644 index e9da398c89c9c6..00000000000000 --- a/tool/lrama/lib/lrama/diagnostics.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Diagnostics - def initialize(grammar, states, logger) - @grammar = grammar - @states = states - @logger = logger - end - - def run(diagnostic) - if diagnostic - diagnose_conflict - diagnose_parameterizing_redefined - end - end - - private - - def diagnose_conflict - if @states.sr_conflicts_count != 0 - @logger.warn("shift/reduce conflicts: #{@states.sr_conflicts_count} found") - end - - if @states.rr_conflicts_count != 0 - @logger.warn("reduce/reduce conflicts: #{@states.rr_conflicts_count} found") - end - end - - def diagnose_parameterizing_redefined - @grammar.parameterizing_rule_resolver.redefined_rules.each do |rule| - @logger.warn("parameterizing rule redefined: #{rule}") - end - end - end -end diff --git a/tool/lrama/lib/lrama/diagram.rb b/tool/lrama/lib/lrama/diagram.rb new file mode 100644 index 00000000000000..985808933fb10f --- /dev/null +++ b/tool/lrama/lib/lrama/diagram.rb @@ -0,0 +1,77 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Diagram + class << self + # @rbs (IO out, Grammar grammar, String template_name) -> void + def render(out:, grammar:, template_name: 'diagram/diagram.html') + return unless require_railroad_diagrams + new(out: out, grammar: grammar, template_name: template_name).render + end + + # @rbs () -> bool + def require_railroad_diagrams + require "railroad_diagrams" + true + rescue LoadError + warn "railroad_diagrams is not installed. Please run `bundle install`." + false + end + end + + # @rbs (IO out, Grammar grammar, String template_name) -> void + def initialize(out:, grammar:, template_name: 'diagram/diagram.html') + @grammar = grammar + @out = out + @template_name = template_name + end + + # @rbs () -> void + def render + RailroadDiagrams::TextDiagram.set_formatting(RailroadDiagrams::TextDiagram::PARTS_UNICODE) + @out << ERB.render(template_file, output: self) + end + + # @rbs () -> string + def default_style + RailroadDiagrams::Style::default_style + end + + # @rbs () -> string + def diagrams + result = +'' + @grammar.unique_rule_s_values.each do |s_value| + diagrams = + @grammar.select_rules_by_s_value(s_value).map { |r| r.to_diagrams } + add_diagram( + s_value, + RailroadDiagrams::Diagram.new( + RailroadDiagrams::Choice.new(0, *diagrams), + ), + result + ) + end + result + end + + private + + # @rbs () -> string + def template_dir + File.expand_path('../../template', __dir__) + end + + # @rbs () -> string + def template_file + File.join(template_dir, @template_name) + end + + # @rbs (String name, RailroadDiagrams::Diagram diagram, String result) -> void + def add_diagram(name, diagram, result) + result << "\n

#{RailroadDiagrams.escape_html(name)}

" + diagram.write_svg(result.method(:<<)) + result << "\n" + end + end +end diff --git a/tool/lrama/lib/lrama/digraph.rb b/tool/lrama/lib/lrama/digraph.rb index 2161f304743cf6..52865f52dde90d 100644 --- a/tool/lrama/lib/lrama/digraph.rb +++ b/tool/lrama/lib/lrama/digraph.rb @@ -2,13 +2,34 @@ # frozen_string_literal: true module Lrama - # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625) + # Digraph Algorithm of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625) # - # @rbs generic X < Object -- Type of a member of `sets` - # @rbs generic Y < _Or -- Type of sets assigned to a member of `sets` + # Digraph is an algorithm for graph data structure. + # The algorithm efficiently traverses SCC (Strongly Connected Component) of graph + # and merges nodes attributes within the same SCC. + # + # `compute_read_sets` and `compute_follow_sets` have the same structure. + # Graph of gotos and attributes of gotos are given then compute propagated attributes for each node. + # + # In the case of `compute_read_sets`: + # + # * Set of gotos is nodes of graph + # * `reads_relation` is edges of graph + # * `direct_read_sets` is nodes attributes + # + # In the case of `compute_follow_sets`: + # + # * Set of gotos is nodes of graph + # * `includes_relation` is edges of graph + # * `read_sets` is nodes attributes + # + # + # @rbs generic X < Object -- Type of a node + # @rbs generic Y < _Or -- Type of attribute sets assigned to a node which should support merge operation (#| method) class Digraph - # TODO: rbs-inline 0.10.0 doesn't support instance variables. + # TODO: rbs-inline 0.11.0 doesn't support instance variables. # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 # # @rbs! # interface _Or @@ -21,9 +42,9 @@ class Digraph # @h: Hash[X, (Integer|Float)?] # @result: Hash[X, Y] - # @rbs sets: Array[X] - # @rbs relation: Hash[X, Array[X]] - # @rbs base_function: Hash[X, Y] + # @rbs sets: Array[X] -- Nodes of graph + # @rbs relation: Hash[X, Array[X]] -- Edges of graph + # @rbs base_function: Hash[X, Y] -- Attributes of nodes # @rbs return: void def initialize(sets, relation, base_function) diff --git a/tool/lrama/lib/lrama/erb.rb b/tool/lrama/lib/lrama/erb.rb new file mode 100644 index 00000000000000..8f8be54811657e --- /dev/null +++ b/tool/lrama/lib/lrama/erb.rb @@ -0,0 +1,29 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require "erb" + +module Lrama + class ERB + # @rbs (String file, **untyped kwargs) -> String + def self.render(file, **kwargs) + new(file).render(**kwargs) + end + + # @rbs (String file) -> void + def initialize(file) + input = File.read(file) + if ::ERB.instance_method(:initialize).parameters.last.first == :key + @erb = ::ERB.new(input, trim_mode: '-') + else + @erb = ::ERB.new(input, nil, '-') # steep:ignore UnexpectedPositionalArgument + end + @erb.filename = file + end + + # @rbs (**untyped kwargs) -> String + def render(**kwargs) + @erb.result_with_hash(kwargs) + end + end +end diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index 214ca1a3f238ba..95a80bb01cbcdf 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require "forwardable" @@ -7,7 +8,8 @@ require_relative "grammar/counter" require_relative "grammar/destructor" require_relative "grammar/error_token" -require_relative "grammar/parameterizing_rule" +require_relative "grammar/inline" +require_relative "grammar/parameterized" require_relative "grammar/percent_code" require_relative "grammar/precedence" require_relative "grammar/printer" @@ -23,19 +25,89 @@ module Lrama # Grammar is the result of parsing an input grammar file class Grammar + # @rbs! + # + # interface _DelegatedMethods + # def rules: () -> Array[Rule] + # def accept_symbol: () -> Grammar::Symbol + # def eof_symbol: () -> Grammar::Symbol + # def undef_symbol: () -> Grammar::Symbol + # def precedences: () -> Array[Precedence] + # + # # delegate to @symbols_resolver + # def symbols: () -> Array[Grammar::Symbol] + # def terms: () -> Array[Grammar::Symbol] + # def nterms: () -> Array[Grammar::Symbol] + # def find_symbol_by_s_value!: (::String s_value) -> Grammar::Symbol + # def ielr_defined?: () -> bool + # end + # + # include Symbols::Resolver::_DelegatedMethods + # + # @rule_counter: Counter + # @percent_codes: Array[PercentCode] + # @printers: Array[Printer] + # @destructors: Array[Destructor] + # @error_tokens: Array[ErrorToken] + # @symbols_resolver: Symbols::Resolver + # @types: Array[Type] + # @rule_builders: Array[RuleBuilder] + # @rules: Array[Rule] + # @sym_to_rules: Hash[Integer, Array[Rule]] + # @parameterized_resolver: Parameterized::Resolver + # @empty_symbol: Grammar::Symbol + # @eof_symbol: Grammar::Symbol + # @error_symbol: Grammar::Symbol + # @undef_symbol: Grammar::Symbol + # @accept_symbol: Grammar::Symbol + # @aux: Auxiliary + # @no_stdlib: bool + # @locations: bool + # @define: Hash[String, String] + # @required: bool + # @union: Union + # @precedences: Array[Precedence] + # @start_nterm: Lrama::Lexer::Token::Base? + extend Forwardable - attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver - attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action, - :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, - :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define + attr_reader :percent_codes #: Array[PercentCode] + attr_reader :eof_symbol #: Grammar::Symbol + attr_reader :error_symbol #: Grammar::Symbol + attr_reader :undef_symbol #: Grammar::Symbol + attr_reader :accept_symbol #: Grammar::Symbol + attr_reader :aux #: Auxiliary + attr_reader :parameterized_resolver #: Parameterized::Resolver + attr_reader :precedences #: Array[Precedence] + attr_accessor :union #: Union + attr_accessor :expect #: Integer + attr_accessor :printers #: Array[Printer] + attr_accessor :error_tokens #: Array[ErrorToken] + attr_accessor :lex_param #: String + attr_accessor :parse_param #: String + attr_accessor :initial_action #: Grammar::Code::InitialActionCode + attr_accessor :after_shift #: Lexer::Token::Base + attr_accessor :before_reduce #: Lexer::Token::Base + attr_accessor :after_reduce #: Lexer::Token::Base + attr_accessor :after_shift_error_token #: Lexer::Token::Base + attr_accessor :after_pop_stack #: Lexer::Token::Base + attr_accessor :symbols_resolver #: Symbols::Resolver + attr_accessor :types #: Array[Type] + attr_accessor :rules #: Array[Rule] + attr_accessor :rule_builders #: Array[RuleBuilder] + attr_accessor :sym_to_rules #: Hash[Integer, Array[Rule]] + attr_accessor :no_stdlib #: bool + attr_accessor :locations #: bool + attr_accessor :define #: Hash[String, String] + attr_accessor :required #: bool def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! - def initialize(rule_counter, define = {}) + # @rbs (Counter rule_counter, bool locations, Hash[String, String] define) -> void + def initialize(rule_counter, locations, define = {}) @rule_counter = rule_counter # Code defined by "%code" @@ -48,7 +120,7 @@ def initialize(rule_counter, define = {}) @rule_builders = [] @rules = [] @sym_to_rules = {} - @parameterizing_rule_resolver = ParameterizingRule::Resolver.new + @parameterized_resolver = Parameterized::Resolver.new @empty_symbol = nil @eof_symbol = nil @error_symbol = nil @@ -56,93 +128,131 @@ def initialize(rule_counter, define = {}) @accept_symbol = nil @aux = Auxiliary.new @no_stdlib = false - @locations = false - @define = define.map {|d| d.split('=') }.to_h + @locations = locations + @define = define + @required = false + @precedences = [] + @start_nterm = nil append_special_symbols end + # @rbs (Counter rule_counter, Counter midrule_action_counter) -> RuleBuilder def create_rule_builder(rule_counter, midrule_action_counter) - RuleBuilder.new(rule_counter, midrule_action_counter, @parameterizing_rule_resolver) + RuleBuilder.new(rule_counter, midrule_action_counter, @parameterized_resolver) end + # @rbs (id: Lexer::Token::Base, code: Lexer::Token::UserCode) -> Array[PercentCode] def add_percent_code(id:, code:) @percent_codes << PercentCode.new(id.s_value, code.s_value) end + # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[Destructor] def add_destructor(ident_or_tags:, token_code:, lineno:) @destructors << Destructor.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) end + # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[Printer] def add_printer(ident_or_tags:, token_code:, lineno:) @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) end + # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[ErrorToken] def add_error_token(ident_or_tags:, token_code:, lineno:) @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) end + # @rbs (id: Lexer::Token::Base, tag: Lexer::Token::Tag) -> Array[Type] def add_type(id:, tag:) @types << Type.new(id: id, tag: tag) end - def add_nonassoc(sym, precedence) - set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence)) + # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence + def add_nonassoc(sym, precedence, s_value, lineno) + set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :nonassoc, precedence: precedence, lineno: lineno)) + end + + # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence + def add_left(sym, precedence, s_value, lineno) + set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :left, precedence: precedence, lineno: lineno)) end - def add_left(sym, precedence) - set_precedence(sym, Precedence.new(type: :left, precedence: precedence)) + # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence + def add_right(sym, precedence, s_value, lineno) + set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :right, precedence: precedence, lineno: lineno)) end - def add_right(sym, precedence) - set_precedence(sym, Precedence.new(type: :right, precedence: precedence)) + # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence + def add_precedence(sym, precedence, s_value, lineno) + set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :precedence, precedence: precedence, lineno: lineno)) end - def add_precedence(sym, precedence) - set_precedence(sym, Precedence.new(type: :precedence, precedence: precedence)) + # @rbs (Lrama::Lexer::Token::Base id) -> Lrama::Lexer::Token::Base + def set_start_nterm(id) + # When multiple `%start` directives are defined, Bison does not generate an error, + # whereas Lrama does generate an error. + # Related Bison's specification are + # refs: https://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html + if @start_nterm.nil? + @start_nterm = id + else + start = @start_nterm #: Lrama::Lexer::Token::Base + raise "Start non-terminal is already set to #{start.s_value} (line: #{start.first_line}). Cannot set to #{id.s_value} (line: #{id.first_line})." + end end + # @rbs (Grammar::Symbol sym, Precedence precedence) -> (Precedence | bot) def set_precedence(sym, precedence) - raise "" if sym.nterm? + @precedences << precedence sym.precedence = precedence end + # @rbs (Grammar::Code::NoReferenceCode code, Integer lineno) -> Union def set_union(code, lineno) @union = Union.new(code: code, lineno: lineno) end + # @rbs (RuleBuilder builder) -> Array[RuleBuilder] def add_rule_builder(builder) @rule_builders << builder end - def add_parameterizing_rule(rule) - @parameterizing_rule_resolver.add_parameterizing_rule(rule) + # @rbs (Parameterized::Rule rule) -> Array[Parameterized::Rule] + def add_parameterized_rule(rule) + @parameterized_resolver.add_rule(rule) end - def parameterizing_rules - @parameterizing_rule_resolver.rules + # @rbs () -> Array[Parameterized::Rule] + def parameterized_rules + @parameterized_resolver.rules end - def insert_before_parameterizing_rules(rules) - @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules + # @rbs (Array[Parameterized::Rule] rules) -> Array[Parameterized::Rule] + def prepend_parameterized_rules(rules) + @parameterized_resolver.rules = rules + @parameterized_resolver.rules end + # @rbs (Integer prologue_first_lineno) -> Integer def prologue_first_lineno=(prologue_first_lineno) @aux.prologue_first_lineno = prologue_first_lineno end + # @rbs (String prologue) -> String def prologue=(prologue) @aux.prologue = prologue end + # @rbs (Integer epilogue_first_lineno) -> Integer def epilogue_first_lineno=(epilogue_first_lineno) @aux.epilogue_first_lineno = epilogue_first_lineno end + # @rbs (String epilogue) -> String def epilogue=(epilogue) @aux.epilogue = epilogue end + # @rbs () -> void def prepare resolve_inline_rules normalize_rules @@ -151,6 +261,7 @@ def prepare fill_default_precedence fill_symbols fill_sym_to_rules + sort_precedence compute_nullable compute_first_set set_locations @@ -159,25 +270,51 @@ def prepare # TODO: More validation methods # # * Validation for no_declared_type_reference + # + # @rbs () -> void def validate! @symbols_resolver.validate! + validate_no_precedence_for_nterm! validate_rule_lhs_is_nterm! + validate_duplicated_precedence! end + # @rbs (Grammar::Symbol sym) -> Array[Rule] def find_rules_by_symbol!(sym) find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found") end + # @rbs (Grammar::Symbol sym) -> Array[Rule]? def find_rules_by_symbol(sym) @sym_to_rules[sym.number] end + # @rbs (String s_value) -> Array[Rule] + def select_rules_by_s_value(s_value) + @rules.select {|rule| rule.lhs.id.s_value == s_value } + end + + # @rbs () -> Array[String] + def unique_rule_s_values + @rules.map {|rule| rule.lhs.id.s_value }.uniq + end + + # @rbs () -> bool def ielr_defined? @define.key?('lr.type') && @define['lr.type'] == 'ielr' end private + # @rbs () -> void + def sort_precedence + @precedences.sort_by! do |prec| + prec.symbol.number + end + @precedences.freeze + end + + # @rbs () -> Array[Grammar::Symbol] def compute_nullable @rules.each do |rule| case @@ -227,6 +364,7 @@ def compute_nullable end end + # @rbs () -> Array[Grammar::Symbol] def compute_first_set terms.each do |term| term.first_set = Set.new([term]).freeze @@ -262,12 +400,14 @@ def compute_first_set end end + # @rbs () -> Array[RuleBuilder] def setup_rules @rule_builders.each do |builder| builder.setup_rules end end + # @rbs () -> Grammar::Symbol def append_special_symbols # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2) @@ -298,11 +438,12 @@ def append_special_symbols @accept_symbol = term end + # @rbs () -> void def resolve_inline_rules while @rule_builders.any?(&:has_inline_rules?) do @rule_builders = @rule_builders.flat_map do |builder| if builder.has_inline_rules? - builder.resolve_inline_rules + Inline::Resolver.new(builder).resolve else builder end @@ -310,14 +451,10 @@ def resolve_inline_rules end end + # @rbs () -> void def normalize_rules - # Add $accept rule to the top of rules - rule_builder = @rule_builders.first # : RuleBuilder - lineno = rule_builder ? rule_builder.line : 0 - @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [rule_builder.lhs, @eof_symbol.id], token_code: nil, lineno: lineno) - + add_accept_rule setup_rules - @rule_builders.each do |builder| builder.rules.each do |rule| add_nterm(id: rule._lhs, tag: rule.lhs_tag) @@ -325,23 +462,42 @@ def normalize_rules end end - @rules.sort_by!(&:id) + nterms.freeze + @rules.sort_by!(&:id).freeze + end + + # Add $accept rule to the top of rules + def add_accept_rule + if @start_nterm + start = @start_nterm #: Lrama::Lexer::Token::Base + @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [start, @eof_symbol.id], token_code: nil, lineno: start.line) + else + rule_builder = @rule_builders.first #: RuleBuilder + lineno = rule_builder ? rule_builder.line : 0 + lhs = rule_builder.lhs #: Lexer::Token::Base + @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [lhs, @eof_symbol.id], token_code: nil, lineno: lineno) + end end # Collect symbols from rules + # + # @rbs () -> void def collect_symbols @rules.flat_map(&:_rhs).each do |s| case s when Lrama::Lexer::Token::Char add_term(id: s) - when Lrama::Lexer::Token + when Lrama::Lexer::Token::Base # skip else raise "Unknown class: #{s}" end end + + terms.freeze end + # @rbs () -> void def set_lhs_and_rhs @rules.each do |rule| rule.lhs = token_to_symbol(rule._lhs) if rule._lhs @@ -355,6 +511,8 @@ def set_lhs_and_rhs # Rule inherits precedence from the last term in RHS. # # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html + # + # @rbs () -> void def fill_default_precedence @rules.each do |rule| # Explicitly specified precedence has the highest priority @@ -369,6 +527,7 @@ def fill_default_precedence end end + # @rbs () -> Array[Grammar::Symbol] def fill_symbols fill_symbol_number fill_nterm_type(@types) @@ -378,6 +537,7 @@ def fill_symbols sort_by_number! end + # @rbs () -> Array[Rule] def fill_sym_to_rules @rules.each do |rule| key = rule.lhs.number @@ -386,13 +546,48 @@ def fill_sym_to_rules end end + # @rbs () -> void + def validate_no_precedence_for_nterm! + errors = [] #: Array[String] + + nterms.each do |nterm| + next if nterm.precedence.nil? + + errors << "[BUG] Precedence #{nterm.name} (line: #{nterm.precedence.lineno}) is defined for nonterminal symbol (line: #{nterm.id.first_line}). Precedence can be defined for only terminal symbol." + end + + return if errors.empty? + + raise errors.join("\n") + end + + # @rbs () -> void def validate_rule_lhs_is_nterm! errors = [] #: Array[String] rules.each do |rule| next if rule.lhs.nterm? - errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is term. It should be nterm." + errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is terminal symbol. It should be nonterminal symbol." + end + + return if errors.empty? + + raise errors.join("\n") + end + + # # @rbs () -> void + def validate_duplicated_precedence! + errors = [] #: Array[String] + seen = {} #: Hash[String, Precedence] + + precedences.each do |prec| + s_value = prec.s_value + if first = seen[s_value] + errors << "%#{prec.type} redeclaration for #{s_value} (line: #{prec.lineno}) previous declaration was %#{first.type} (line: #{first.lineno})" + else + seen[s_value] = prec + end end return if errors.empty? @@ -400,6 +595,7 @@ def validate_rule_lhs_is_nterm! raise errors.join("\n") end + # @rbs () -> void def set_locations @locations = @locations || @rules.any? {|rule| rule.contains_at_reference? } end diff --git a/tool/lrama/lib/lrama/grammar/auxiliary.rb b/tool/lrama/lib/lrama/grammar/auxiliary.rb index 2bacee6f1a87e7..76cfb74d4d5767 100644 --- a/tool/lrama/lib/lrama/grammar/auxiliary.rb +++ b/tool/lrama/lib/lrama/grammar/auxiliary.rb @@ -1,9 +1,14 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar # Grammar file information not used by States but by Output - class Auxiliary < Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true) + class Auxiliary + attr_accessor :prologue_first_lineno #: Integer? + attr_accessor :prologue #: String? + attr_accessor :epilogue_first_lineno #: Integer? + attr_accessor :epilogue #: String? end end end diff --git a/tool/lrama/lib/lrama/grammar/binding.rb b/tool/lrama/lib/lrama/grammar/binding.rb index 2efb918a0b2205..5940d153a9fa6c 100644 --- a/tool/lrama/lib/lrama/grammar/binding.rb +++ b/tool/lrama/lib/lrama/grammar/binding.rb @@ -4,51 +4,63 @@ module Lrama class Grammar class Binding - # @rbs @actual_args: Array[Lexer::Token] - # @rbs @param_to_arg: Hash[String, Lexer::Token] + # @rbs @actual_args: Array[Lexer::Token::Base] + # @rbs @param_to_arg: Hash[String, Lexer::Token::Base] - # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> void + # @rbs (Array[Lexer::Token::Base] params, Array[Lexer::Token::Base] actual_args) -> void def initialize(params, actual_args) @actual_args = actual_args - @param_to_arg = map_params_to_args(params, @actual_args) + @param_to_arg = build_param_to_arg(params, @actual_args) end - # @rbs (Lexer::Token sym) -> Lexer::Token + # @rbs (Lexer::Token::Base sym) -> Lexer::Token::Base def resolve_symbol(sym) - if sym.is_a?(Lexer::Token::InstantiateRule) - Lrama::Lexer::Token::InstantiateRule.new( - s_value: sym.s_value, location: sym.location, args: resolved_args(sym), lhs_tag: sym.lhs_tag - ) - else - param_to_arg(sym) - end + return create_instantiate_rule(sym) if sym.is_a?(Lexer::Token::InstantiateRule) + find_arg_for_param(sym) end # @rbs (Lexer::Token::InstantiateRule token) -> String def concatenated_args_str(token) - "#{token.rule_name}_#{token_to_args_s_values(token).join('_')}" + "#{token.rule_name}_#{format_args(token)}" end private - # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> Hash[String, Lexer::Token] - def map_params_to_args(params, actual_args) - params.zip(actual_args).map do |param, arg| - [param.s_value, arg] - end.to_h + # @rbs (Lexer::Token::InstantiateRule sym) -> Lexer::Token::InstantiateRule + def create_instantiate_rule(sym) + Lrama::Lexer::Token::InstantiateRule.new( + s_value: sym.s_value, + location: sym.location, + args: resolve_args(sym.args), + lhs_tag: sym.lhs_tag + ) end - # @rbs (Lexer::Token::InstantiateRule sym) -> Array[Lexer::Token] - def resolved_args(sym) - sym.args.map { |arg| resolve_symbol(arg) } + # @rbs (Array[Lexer::Token::Base]) -> Array[Lexer::Token::Base] + def resolve_args(args) + args.map { |arg| resolve_symbol(arg) } end - # @rbs (Lexer::Token sym) -> Lexer::Token - def param_to_arg(sym) - if (arg = @param_to_arg[sym.s_value].dup) + # @rbs (Lexer::Token::Base sym) -> Lexer::Token::Base + def find_arg_for_param(sym) + if (arg = @param_to_arg[sym.s_value]&.dup) arg.alias_name = sym.alias_name + arg + else + sym end - arg || sym + end + + # @rbs (Array[Lexer::Token::Base] params, Array[Lexer::Token::Base] actual_args) -> Hash[String, Lexer::Token::Base?] + def build_param_to_arg(params, actual_args) + params.zip(actual_args).map do |param, arg| + [param.s_value, arg] + end.to_h + end + + # @rbs (Lexer::Token::InstantiateRule token) -> String + def format_args(token) + token_to_args_s_values(token).join('_') end # @rbs (Lexer::Token::InstantiateRule token) -> Array[String] diff --git a/tool/lrama/lib/lrama/grammar/code.rb b/tool/lrama/lib/lrama/grammar/code.rb index b6c1cc49e74287..f1b860eeba3f96 100644 --- a/tool/lrama/lib/lrama/grammar/code.rb +++ b/tool/lrama/lib/lrama/grammar/code.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require "forwardable" @@ -10,17 +11,28 @@ module Lrama class Grammar class Code + # @rbs! + # + # # delegated + # def s_value: -> String + # def line: -> Integer + # def column: -> Integer + # def references: -> Array[Lrama::Grammar::Reference] + extend Forwardable def_delegators "token_code", :s_value, :line, :column, :references - attr_reader :type, :token_code + attr_reader :type #: ::Symbol + attr_reader :token_code #: Lexer::Token::UserCode + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode) -> void def initialize(type:, token_code:) @type = type @token_code = token_code end + # @rbs (Code other) -> bool def ==(other) self.class == other.class && self.type == other.type && @@ -28,6 +40,8 @@ def ==(other) end # $$, $n, @$, @n are translated to C code + # + # @rbs () -> String def translated_code t_code = s_value.dup @@ -45,6 +59,7 @@ def translated_code private + # @rbs (Lrama::Grammar::Reference ref) -> bot def reference_to_c(ref) raise NotImplementedError.new("#reference_to_c is not implemented") end diff --git a/tool/lrama/lib/lrama/grammar/code/destructor_code.rb b/tool/lrama/lib/lrama/grammar/code/destructor_code.rb index 794017257c5486..d71b62e5133591 100644 --- a/tool/lrama/lib/lrama/grammar/code/destructor_code.rb +++ b/tool/lrama/lib/lrama/grammar/code/destructor_code.rb @@ -1,9 +1,18 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Code class DestructorCode < Code + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @tag: Lexer::Token::Tag + + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, tag: Lexer::Token::Tag) -> void def initialize(type:, token_code:, tag:) super(type: type, token_code: token_code) @tag = tag @@ -17,6 +26,8 @@ def initialize(type:, token_code:, tag:) # * ($1) error # * (@1) error # * ($:1) error + # + # @rbs (Reference ref) -> (String | bot) def reference_to_c(ref) case when ref.type == :dollar && ref.name == "$" # $$ diff --git a/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb b/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb index 02f2badc9e32e8..cb36041524fcc1 100644 --- a/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb +++ b/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama @@ -12,6 +13,8 @@ class InitialActionCode < Code # * ($1) error # * (@1) error # * ($:1) error + # + # @rbs (Reference ref) -> (String | bot) def reference_to_c(ref) case when ref.type == :dollar && ref.name == "$" # $$ diff --git a/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb b/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb index ab12f32e297567..1d39919979a0a6 100644 --- a/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb +++ b/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama @@ -12,6 +13,8 @@ class NoReferenceCode < Code # * ($1) error # * (@1) error # * ($:1) error + # + # @rbs (Reference ref) -> bot def reference_to_c(ref) case when ref.type == :dollar # $$, $n diff --git a/tool/lrama/lib/lrama/grammar/code/printer_code.rb b/tool/lrama/lib/lrama/grammar/code/printer_code.rb index c0b8d24306ff05..c6e25d523553c3 100644 --- a/tool/lrama/lib/lrama/grammar/code/printer_code.rb +++ b/tool/lrama/lib/lrama/grammar/code/printer_code.rb @@ -1,9 +1,18 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Code class PrinterCode < Code + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @tag: Lexer::Token::Tag + + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, tag: Lexer::Token::Tag) -> void def initialize(type:, token_code:, tag:) super(type: type, token_code: token_code) @tag = tag @@ -17,6 +26,8 @@ def initialize(type:, token_code:, tag:) # * ($1) error # * (@1) error # * ($:1) error + # + # @rbs (Reference ref) -> (String | bot) def reference_to_c(ref) case when ref.type == :dollar && ref.name == "$" # $$ diff --git a/tool/lrama/lib/lrama/grammar/code/rule_action.rb b/tool/lrama/lib/lrama/grammar/code/rule_action.rb index 363ecdf25dfc19..e71e93e5a5bb8d 100644 --- a/tool/lrama/lib/lrama/grammar/code/rule_action.rb +++ b/tool/lrama/lib/lrama/grammar/code/rule_action.rb @@ -1,9 +1,18 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Code class RuleAction < Code + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @rule: Rule + + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void def initialize(type:, token_code:, rule:) super(type: type, token_code: token_code) @rule = rule @@ -38,6 +47,8 @@ def initialize(type:, token_code:, rule:) # "Position in grammar" $1 # "Index for yyvsp" 0 # "$:n" $:1 + # + # @rbs (Reference ref) -> String def reference_to_c(ref) case when ref.type == :dollar && ref.name == "$" # $$ @@ -66,6 +77,7 @@ def reference_to_c(ref) end end + # @rbs () -> Integer def position_in_rhs # If rule is not derived rule, User Code is only action at # the end of rule RHS. In such case, the action is located on @@ -74,15 +86,20 @@ def position_in_rhs end # If this is midrule action, RHS is an RHS of the original rule. + # + # @rbs () -> Array[Grammar::Symbol] def rhs (@rule.original_rule || @rule).rhs end # Unlike `rhs`, LHS is always an LHS of the rule. + # + # @rbs () -> Grammar::Symbol def lhs @rule.lhs end + # @rbs (Reference ref) -> bot def raise_tag_not_found_error(ref) raise "Tag is not specified for '$#{ref.value}' in '#{@rule.display_name}'" end diff --git a/tool/lrama/lib/lrama/grammar/counter.rb b/tool/lrama/lib/lrama/grammar/counter.rb index dc91b87b711f2d..ced934309d7d15 100644 --- a/tool/lrama/lib/lrama/grammar/counter.rb +++ b/tool/lrama/lib/lrama/grammar/counter.rb @@ -1,12 +1,22 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Counter + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @number: Integer + + # @rbs (Integer number) -> void def initialize(number) @number = number end + # @rbs () -> Integer def increment n = @number @number += 1 diff --git a/tool/lrama/lib/lrama/grammar/destructor.rb b/tool/lrama/lib/lrama/grammar/destructor.rb index a2b6fde0ed5b3d..0ce8611e776c3b 100644 --- a/tool/lrama/lib/lrama/grammar/destructor.rb +++ b/tool/lrama/lib/lrama/grammar/destructor.rb @@ -1,8 +1,21 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class Destructor < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + class Destructor + attr_reader :ident_or_tags #: Array[Lexer::Token::Ident|Lexer::Token::Tag] + attr_reader :token_code #: Lexer::Token::UserCode + attr_reader :lineno #: Integer + + # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> void + def initialize(ident_or_tags:, token_code:, lineno:) + @ident_or_tags = ident_or_tags + @token_code = token_code + @lineno = lineno + end + + # @rbs (Lexer::Token::Tag tag) -> String def translated_code(tag) Code::DestructorCode.new(type: :destructor, token_code: token_code, tag: tag).translated_code end diff --git a/tool/lrama/lib/lrama/grammar/error_token.rb b/tool/lrama/lib/lrama/grammar/error_token.rb index 50eaafeebc66fd..9d9ed54ae20edc 100644 --- a/tool/lrama/lib/lrama/grammar/error_token.rb +++ b/tool/lrama/lib/lrama/grammar/error_token.rb @@ -1,8 +1,21 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class ErrorToken < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + class ErrorToken + attr_reader :ident_or_tags #: Array[Lexer::Token::Ident | Lexer::Token::Tag] + attr_reader :token_code #: Lexer::Token::UserCode + attr_reader :lineno #: Integer + + # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> void + def initialize(ident_or_tags:, token_code:, lineno:) + @ident_or_tags = ident_or_tags + @token_code = token_code + @lineno = lineno + end + + # @rbs (Lexer::Token::Tag tag) -> String def translated_code(tag) Code::PrinterCode.new(type: :error_token, token_code: token_code, tag: tag).translated_code end diff --git a/tool/lrama/lib/lrama/grammar/inline.rb b/tool/lrama/lib/lrama/grammar/inline.rb new file mode 100644 index 00000000000000..c02ab6002ba1f8 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/inline.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative 'inline/resolver' diff --git a/tool/lrama/lib/lrama/grammar/inline/resolver.rb b/tool/lrama/lib/lrama/grammar/inline/resolver.rb new file mode 100644 index 00000000000000..aca689ccfb4325 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/inline/resolver.rb @@ -0,0 +1,80 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + class Inline + class Resolver + # @rbs (Lrama::Grammar::RuleBuilder rule_builder) -> void + def initialize(rule_builder) + @rule_builder = rule_builder + end + + # @rbs () -> Array[Lrama::Grammar::RuleBuilder] + def resolve + resolved_builders = [] #: Array[Lrama::Grammar::RuleBuilder] + @rule_builder.rhs.each_with_index do |token, i| + if (rule = @rule_builder.parameterized_resolver.find_inline(token)) + rule.rhs.each do |rhs| + builder = build_rule(rhs, token, i, rule) + resolved_builders << builder + end + break + end + end + resolved_builders + end + + private + + # @rbs (Lrama::Grammar::Parameterized::Rhs rhs, Lrama::Lexer::Token token, Integer index, Lrama::Grammar::Parameterized::Rule rule) -> Lrama::Grammar::RuleBuilder + def build_rule(rhs, token, index, rule) + builder = RuleBuilder.new( + @rule_builder.rule_counter, + @rule_builder.midrule_action_counter, + @rule_builder.parameterized_resolver, + lhs_tag: @rule_builder.lhs_tag + ) + resolve_rhs(builder, rhs, index, token, rule) + builder.lhs = @rule_builder.lhs + builder.line = @rule_builder.line + builder.precedence_sym = @rule_builder.precedence_sym + builder.user_code = replace_user_code(rhs, index) + builder + end + + # @rbs (Lrama::Grammar::RuleBuilder builder, Lrama::Grammar::Parameterized::Rhs rhs, Integer index, Lrama::Lexer::Token token, Lrama::Grammar::Parameterized::Rule rule) -> void + def resolve_rhs(builder, rhs, index, token, rule) + @rule_builder.rhs.each_with_index do |tok, i| + if i == index + rhs.symbols.each do |sym| + if token.is_a?(Lexer::Token::InstantiateRule) + bindings = Binding.new(rule.parameters, token.args) + builder.add_rhs(bindings.resolve_symbol(sym)) + else + builder.add_rhs(sym) + end + end + else + builder.add_rhs(tok) + end + end + end + + # @rbs (Lrama::Grammar::Parameterized::Rhs rhs, Integer index) -> Lrama::Lexer::Token::UserCode + def replace_user_code(rhs, index) + user_code = @rule_builder.user_code + return user_code if rhs.user_code.nil? || user_code.nil? + + code = user_code.s_value.gsub(/\$#{index + 1}/, rhs.user_code.s_value) + user_code.references.each do |ref| + next if ref.index.nil? || ref.index <= index # nil は $$ の場合 + code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (rhs.symbols.count - 1)}") + code = code.gsub(/@#{ref.index}/, "@#{ref.index + (rhs.symbols.count - 1)}") + end + Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location) + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/parameterized.rb b/tool/lrama/lib/lrama/grammar/parameterized.rb new file mode 100644 index 00000000000000..48db3433f379a8 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterized.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +require_relative 'parameterized/resolver' +require_relative 'parameterized/rhs' +require_relative 'parameterized/rule' diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb b/tool/lrama/lib/lrama/grammar/parameterized/resolver.rb similarity index 60% rename from tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb rename to tool/lrama/lib/lrama/grammar/parameterized/resolver.rb index 06f2f1cef7b9e4..558f3081906b3f 100644 --- a/tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb +++ b/tool/lrama/lib/lrama/grammar/parameterized/resolver.rb @@ -1,40 +1,49 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class ParameterizingRule + class Parameterized class Resolver - attr_accessor :rules, :created_lhs_list + attr_accessor :rules #: Array[Rule] + attr_accessor :created_lhs_list #: Array[Lexer::Token::Base] + # @rbs () -> void def initialize @rules = [] @created_lhs_list = [] end - def add_parameterizing_rule(rule) + # @rbs (Rule rule) -> Array[Rule] + def add_rule(rule) @rules << rule end + # @rbs (Lexer::Token::InstantiateRule token) -> Rule? def find_rule(token) select_rules(@rules, token).last end + # @rbs (Lexer::Token::Base token) -> Rule? def find_inline(token) - @rules.reverse.find { |rule| rule.name == token.s_value && rule.is_inline } + @rules.reverse.find { |rule| rule.name == token.s_value && rule.inline? } end + # @rbs (String lhs_s_value) -> Lexer::Token::Base? def created_lhs(lhs_s_value) @created_lhs_list.reverse.find { |created_lhs| created_lhs.s_value == lhs_s_value } end + # @rbs () -> Array[Rule] def redefined_rules @rules.select { |rule| @rules.count { |r| r.name == rule.name && r.required_parameters_count == rule.required_parameters_count } > 1 } end private + # @rbs (Array[Rule] rules, Lexer::Token::InstantiateRule token) -> Array[Rule] def select_rules(rules, token) - rules = select_not_inline_rules(rules) + rules = reject_inline_rules(rules) rules = select_rules_by_name(rules, token.rule_name) rules = rules.select { |rule| rule.required_parameters_count == token.args_count } if rules.empty? @@ -44,14 +53,16 @@ def select_rules(rules, token) end end - def select_not_inline_rules(rules) - rules.select { |rule| !rule.is_inline } + # @rbs (Array[Rule] rules) -> Array[Rule] + def reject_inline_rules(rules) + rules.reject(&:inline?) end + # @rbs (Array[Rule] rules, String rule_name) -> Array[Rule] def select_rules_by_name(rules, rule_name) rules = rules.select { |rule| rule.name == rule_name } if rules.empty? - raise "Parameterizing rule does not exist. `#{rule_name}`" + raise "Parameterized rule does not exist. `#{rule_name}`" else rules end diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb b/tool/lrama/lib/lrama/grammar/parameterized/rhs.rb similarity index 73% rename from tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb rename to tool/lrama/lib/lrama/grammar/parameterized/rhs.rb index f60781c0534905..663de49100341d 100644 --- a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb +++ b/tool/lrama/lib/lrama/grammar/parameterized/rhs.rb @@ -1,17 +1,22 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class ParameterizingRule + class Parameterized class Rhs - attr_accessor :symbols, :user_code, :precedence_sym + attr_accessor :symbols #: Array[Lexer::Token::Base] + attr_accessor :user_code #: Lexer::Token::UserCode? + attr_accessor :precedence_sym #: Grammar::Symbol? + # @rbs () -> void def initialize @symbols = [] @user_code = nil @precedence_sym = nil end + # @rbs (Grammar::Binding bindings) -> Lexer::Token::UserCode? def resolve_user_code(bindings) return unless user_code diff --git a/tool/lrama/lib/lrama/grammar/parameterized/rule.rb b/tool/lrama/lib/lrama/grammar/parameterized/rule.rb new file mode 100644 index 00000000000000..7048be3cffc377 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterized/rule.rb @@ -0,0 +1,36 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + class Parameterized + class Rule + attr_reader :name #: String + attr_reader :parameters #: Array[Lexer::Token::Base] + attr_reader :rhs #: Array[Rhs] + attr_reader :required_parameters_count #: Integer + attr_reader :tag #: Lexer::Token::Tag? + + # @rbs (String name, Array[Lexer::Token::Base] parameters, Array[Rhs] rhs, tag: Lexer::Token::Tag?, is_inline: bool) -> void + def initialize(name, parameters, rhs, tag: nil, is_inline: false) + @name = name + @parameters = parameters + @rhs = rhs + @tag = tag + @is_inline = is_inline + @required_parameters_count = parameters.count + end + + # @rbs () -> String + def to_s + "#{@name}(#{@parameters.map(&:s_value).join(', ')})" + end + + # @rbs () -> bool + def inline? + @is_inline + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb deleted file mode 100644 index ddc1a467ce974e..00000000000000 --- a/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -require_relative 'parameterizing_rule/resolver' -require_relative 'parameterizing_rule/rhs' -require_relative 'parameterizing_rule/rule' diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb deleted file mode 100644 index cc200d2fb60da9..00000000000000 --- a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Grammar - class ParameterizingRule - class Rule - attr_reader :name, :parameters, :rhs_list, :required_parameters_count, :tag, :is_inline - - def initialize(name, parameters, rhs_list, tag: nil, is_inline: false) - @name = name - @parameters = parameters - @rhs_list = rhs_list - @tag = tag - @is_inline = is_inline - @required_parameters_count = parameters.count - end - - def to_s - "#{@name}(#{@parameters.map(&:s_value).join(', ')})" - end - end - end - end -end diff --git a/tool/lrama/lib/lrama/grammar/percent_code.rb b/tool/lrama/lib/lrama/grammar/percent_code.rb index 416a2d27534b97..9afb903056dff4 100644 --- a/tool/lrama/lib/lrama/grammar/percent_code.rb +++ b/tool/lrama/lib/lrama/grammar/percent_code.rb @@ -1,10 +1,21 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class PercentCode - attr_reader :name, :code + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @name: String + # @code: String + attr_reader :name #: String + attr_reader :code #: String + + # @rbs (String name, String code) -> void def initialize(name, code) @name = name @code = code diff --git a/tool/lrama/lib/lrama/grammar/precedence.rb b/tool/lrama/lib/lrama/grammar/precedence.rb index 13cf960c32a5f4..b4c6403372dc0b 100644 --- a/tool/lrama/lib/lrama/grammar/precedence.rb +++ b/tool/lrama/lib/lrama/grammar/precedence.rb @@ -1,13 +1,55 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class Precedence < Struct.new(:type, :precedence, keyword_init: true) + class Precedence < Struct.new(:type, :symbol, :precedence, :s_value, :lineno, keyword_init: true) include Comparable + # @rbs! + # type type_enum = :left | :right | :nonassoc | :precedence + # + # attr_accessor type: type_enum + # attr_accessor symbol: Grammar::Symbol + # attr_accessor precedence: Integer + # attr_accessor s_value: String + # attr_accessor lineno: Integer + # + # def initialize: (?type: type_enum, ?symbol: Grammar::Symbol, ?precedence: Integer, ?s_value: ::String, ?lineno: Integer) -> void + attr_reader :used_by_lalr #: Array[State::ResolvedConflict] + attr_reader :used_by_ielr #: Array[State::ResolvedConflict] + + # @rbs (Precedence other) -> Integer def <=>(other) self.precedence <=> other.precedence end + + # @rbs (State::ResolvedConflict resolved_conflict) -> void + def mark_used_by_lalr(resolved_conflict) + @used_by_lalr ||= [] #: Array[State::ResolvedConflict] + @used_by_lalr << resolved_conflict + end + + # @rbs (State::ResolvedConflict resolved_conflict) -> void + def mark_used_by_ielr(resolved_conflict) + @used_by_ielr ||= [] #: Array[State::ResolvedConflict] + @used_by_ielr << resolved_conflict + end + + # @rbs () -> bool + def used_by? + used_by_lalr? || used_by_ielr? + end + + # @rbs () -> bool + def used_by_lalr? + !@used_by_lalr.nil? && !@used_by_lalr.empty? + end + + # @rbs () -> bool + def used_by_ielr? + !@used_by_ielr.nil? && !@used_by_ielr.empty? + end end end end diff --git a/tool/lrama/lib/lrama/grammar/printer.rb b/tool/lrama/lib/lrama/grammar/printer.rb index b78459e819a7bf..490fe701dbafe7 100644 --- a/tool/lrama/lib/lrama/grammar/printer.rb +++ b/tool/lrama/lib/lrama/grammar/printer.rb @@ -1,8 +1,17 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Printer < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + # @rbs! + # attr_accessor ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag] + # attr_accessor token_code: Lexer::Token::UserCode + # attr_accessor lineno: Integer + # + # def initialize: (?ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], ?token_code: Lexer::Token::UserCode, ?lineno: Integer) -> void + + # @rbs (Lexer::Token::Tag tag) -> String def translated_code(tag) Code::PrinterCode.new(type: :printer, token_code: token_code, tag: tag).translated_code end diff --git a/tool/lrama/lib/lrama/grammar/reference.rb b/tool/lrama/lib/lrama/grammar/reference.rb index b044516bdb9a83..7e3badfecc3911 100644 --- a/tool/lrama/lib/lrama/grammar/reference.rb +++ b/tool/lrama/lib/lrama/grammar/reference.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama @@ -8,6 +9,18 @@ class Grammar # index: Integer # ex_tag: "$1" (Optional) class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true) + # @rbs! + # attr_accessor type: ::Symbol + # attr_accessor name: String + # attr_accessor number: Integer + # attr_accessor index: Integer + # attr_accessor ex_tag: Lexer::Token::Base? + # attr_accessor first_column: Integer + # attr_accessor last_column: Integer + # + # def initialize: (type: ::Symbol, ?name: String, ?number: Integer, ?index: Integer, ?ex_tag: Lexer::Token::Base?, first_column: Integer, last_column: Integer) -> void + + # @rbs () -> (String|Integer) def value name || number end diff --git a/tool/lrama/lib/lrama/grammar/rule.rb b/tool/lrama/lib/lrama/grammar/rule.rb index 445752ae0dd952..d00d6a88830ae9 100644 --- a/tool/lrama/lib/lrama/grammar/rule.rb +++ b/tool/lrama/lib/lrama/grammar/rule.rb @@ -1,11 +1,38 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar # _rhs holds original RHS element. Use rhs to refer to Symbol. class Rule < Struct.new(:id, :_lhs, :lhs, :lhs_tag, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true) - attr_accessor :original_rule + # @rbs! + # + # interface _DelegatedMethods + # def lhs: -> Grammar::Symbol + # def rhs: -> Array[Grammar::Symbol] + # end + # + # attr_accessor id: Integer + # attr_accessor _lhs: Lexer::Token::Base + # attr_accessor lhs: Grammar::Symbol + # attr_accessor lhs_tag: Lexer::Token::Tag? + # attr_accessor _rhs: Array[Lexer::Token::Base] + # attr_accessor rhs: Array[Grammar::Symbol] + # attr_accessor token_code: Lexer::Token::UserCode? + # attr_accessor position_in_original_rule_rhs: Integer + # attr_accessor nullable: bool + # attr_accessor precedence_sym: Grammar::Symbol? + # attr_accessor lineno: Integer? + # + # def initialize: ( + # ?id: Integer, ?_lhs: Lexer::Token::Base?, ?lhs: Lexer::Token::Base, ?lhs_tag: Lexer::Token::Tag?, ?_rhs: Array[Lexer::Token::Base], ?rhs: Array[Grammar::Symbol], + # ?token_code: Lexer::Token::UserCode?, ?position_in_original_rule_rhs: Integer?, ?nullable: bool, + # ?precedence_sym: Grammar::Symbol?, ?lineno: Integer? + # ) -> void + attr_accessor :original_rule #: Rule + + # @rbs (Rule other) -> bool def ==(other) self.class == other.class && self.lhs == other.lhs && @@ -18,12 +45,14 @@ def ==(other) self.lineno == other.lineno end + # @rbs () -> String def display_name l = lhs.id.s_value r = empty_rule? ? "ε" : rhs.map {|r| r.id.s_value }.join(" ") "#{l} -> #{r}" end + # @rbs () -> String def display_name_without_action l = lhs.id.s_value r = empty_rule? ? "ε" : rhs.map do |r| @@ -33,7 +62,18 @@ def display_name_without_action "#{l} -> #{r}" end + # @rbs () -> (RailroadDiagrams::Skip | RailroadDiagrams::Sequence) + def to_diagrams + if rhs.empty? + RailroadDiagrams::Skip.new + else + RailroadDiagrams::Sequence.new(*rhs_to_diagram) + end + end + # Used by #user_actions + # + # @rbs () -> String def as_comment l = lhs.id.s_value r = empty_rule? ? "%empty" : rhs.map(&:display_name).join(" ") @@ -41,35 +81,55 @@ def as_comment "#{l}: #{r}" end + # @rbs () -> String def with_actions "#{display_name} {#{token_code&.s_value}}" end # opt_nl: ε <-- empty_rule # | '\n' <-- not empty_rule + # + # @rbs () -> bool def empty_rule? rhs.empty? end + # @rbs () -> Precedence? def precedence precedence_sym&.precedence end + # @rbs () -> bool def initial_rule? id == 0 end + # @rbs () -> String? def translated_code return nil unless token_code Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code end + # @rbs () -> bool def contains_at_reference? return false unless token_code token_code.references.any? {|r| r.type == :at } end + + private + + # @rbs () -> Array[(RailroadDiagrams::Terminal | RailroadDiagrams::NonTerminal)] + def rhs_to_diagram + rhs.map do |r| + if r.term + RailroadDiagrams::Terminal.new(r.id.s_value) + else + RailroadDiagrams::NonTerminal.new(r.id.s_value) + end + end + end end end end diff --git a/tool/lrama/lib/lrama/grammar/rule_builder.rb b/tool/lrama/lib/lrama/grammar/rule_builder.rb index 481a3780f49aa5..34fdca6c86bf45 100644 --- a/tool/lrama/lib/lrama/grammar/rule_builder.rb +++ b/tool/lrama/lib/lrama/grammar/rule_builder.rb @@ -1,15 +1,38 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class RuleBuilder - attr_accessor :lhs, :line - attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym - - def initialize(rule_counter, midrule_action_counter, parameterizing_rule_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false) + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @position_in_original_rule_rhs: Integer? + # @skip_preprocess_references: bool + # @rules: Array[Rule] + # @rule_builders_for_parameterized: Array[RuleBuilder] + # @rule_builders_for_derived_rules: Array[RuleBuilder] + # @parameterized_rules: Array[Rule] + # @midrule_action_rules: Array[Rule] + # @replaced_rhs: Array[Lexer::Token::Base]? + + attr_accessor :lhs #: Lexer::Token::Base? + attr_accessor :line #: Integer? + attr_reader :rule_counter #: Counter + attr_reader :midrule_action_counter #: Counter + attr_reader :parameterized_resolver #: Grammar::Parameterized::Resolver + attr_reader :lhs_tag #: Lexer::Token::Tag? + attr_reader :rhs #: Array[Lexer::Token::Base] + attr_reader :user_code #: Lexer::Token::UserCode? + attr_reader :precedence_sym #: Grammar::Symbol? + + # @rbs (Counter rule_counter, Counter midrule_action_counter, Grammar::Parameterized::Resolver parameterized_resolver, ?Integer position_in_original_rule_rhs, ?lhs_tag: Lexer::Token::Tag?, ?skip_preprocess_references: bool) -> void + def initialize(rule_counter, midrule_action_counter, parameterized_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false) @rule_counter = rule_counter @midrule_action_counter = midrule_action_counter - @parameterizing_rule_resolver = parameterizing_rule_resolver + @parameterized_resolver = parameterized_resolver @position_in_original_rule_rhs = position_in_original_rule_rhs @skip_preprocess_references = skip_preprocess_references @@ -20,12 +43,13 @@ def initialize(rule_counter, midrule_action_counter, parameterizing_rule_resolve @precedence_sym = nil @line = nil @rules = [] - @rule_builders_for_parameterizing_rules = [] + @rule_builders_for_parameterized = [] @rule_builders_for_derived_rules = [] - @parameterizing_rules = [] + @parameterized_rules = [] @midrule_action_rules = [] end + # @rbs (Lexer::Token::Base rhs) -> void def add_rhs(rhs) @line ||= rhs.line @@ -34,6 +58,7 @@ def add_rhs(rhs) @rhs << rhs end + # @rbs (Lexer::Token::UserCode? user_code) -> void def user_code=(user_code) @line ||= user_code&.line @@ -42,72 +67,59 @@ def user_code=(user_code) @user_code = user_code end + # @rbs (Grammar::Symbol? precedence_sym) -> void def precedence_sym=(precedence_sym) flush_user_code @precedence_sym = precedence_sym end + # @rbs () -> void def complete_input freeze_rhs end + # @rbs () -> void def setup_rules preprocess_references unless @skip_preprocess_references process_rhs + resolve_inline_rules build_rules end + # @rbs () -> Array[Grammar::Rule] def rules - @parameterizing_rules + @midrule_action_rules + @rules + @parameterized_rules + @midrule_action_rules + @rules end + # @rbs () -> bool def has_inline_rules? - rhs.any? { |token| @parameterizing_rule_resolver.find_inline(token) } - end - - def resolve_inline_rules - resolved_builders = [] #: Array[RuleBuilder] - rhs.each_with_index do |token, i| - if (inline_rule = @parameterizing_rule_resolver.find_inline(token)) - inline_rule.rhs_list.each do |inline_rhs| - rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: lhs_tag) - if token.is_a?(Lexer::Token::InstantiateRule) - resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule.parameters, token.args)) - else - resolve_inline_rhs(rule_builder, inline_rhs, i) - end - rule_builder.lhs = lhs - rule_builder.line = line - rule_builder.precedence_sym = precedence_sym - rule_builder.user_code = replace_inline_user_code(inline_rhs, i) - resolved_builders << rule_builder - end - break - end - end - resolved_builders + rhs.any? { |token| @parameterized_resolver.find_inline(token) } end private + # @rbs () -> void def freeze_rhs @rhs.freeze end + # @rbs () -> void def preprocess_references numberize_references end + # @rbs () -> void def build_rules - tokens = @replaced_rhs + tokens = @replaced_rhs #: Array[Lexer::Token::Base] + return if tokens.any? { |t| @parameterized_resolver.find_inline(t) } rule = Rule.new( id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code, position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line ) @rules = [rule] - @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder| + @parameterized_rules = @rule_builders_for_parameterized.map do |rule_builder| rule_builder.rules end.flatten @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder| @@ -120,31 +132,33 @@ def build_rules # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on. # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`. + # + # @rbs () -> void def process_rhs return if @replaced_rhs - @replaced_rhs = [] + replaced_rhs = [] #: Array[Lexer::Token::Base] rhs.each_with_index do |token, i| case token when Lrama::Lexer::Token::Char - @replaced_rhs << token + replaced_rhs << token when Lrama::Lexer::Token::Ident - @replaced_rhs << token + replaced_rhs << token when Lrama::Lexer::Token::InstantiateRule - parameterizing_rule = @parameterizing_rule_resolver.find_rule(token) - raise "Unexpected token. #{token}" unless parameterizing_rule + parameterized_rule = @parameterized_resolver.find_rule(token) + raise "Unexpected token. #{token}" unless parameterized_rule - bindings = Binding.new(parameterizing_rule.parameters, token.args) + bindings = Binding.new(parameterized_rule.parameters, token.args) lhs_s_value = bindings.concatenated_args_str(token) - if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value)) - @replaced_rhs << created_lhs + if (created_lhs = @parameterized_resolver.created_lhs(lhs_s_value)) + replaced_rhs << created_lhs else lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location) - @replaced_rhs << lhs_token - @parameterizing_rule_resolver.created_lhs_list << lhs_token - parameterizing_rule.rhs_list.each do |r| - rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag) + replaced_rhs << lhs_token + @parameterized_resolver.created_lhs_list << lhs_token + parameterized_rule.rhs.each do |r| + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, lhs_tag: token.lhs_tag || parameterized_rule.tag) rule_builder.lhs = lhs_token r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) } rule_builder.line = line @@ -152,51 +166,48 @@ def process_rhs rule_builder.user_code = r.resolve_user_code(bindings) rule_builder.complete_input rule_builder.setup_rules - @rule_builders_for_parameterizing_rules << rule_builder + @rule_builders_for_parameterized << rule_builder end end when Lrama::Lexer::Token::UserCode prefix = token.referred ? "@" : "$@" tag = token.tag || lhs_tag new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s) - @replaced_rhs << new_token + replaced_rhs << new_token - rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, i, lhs_tag: tag, skip_preprocess_references: true) + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterized_resolver, i, lhs_tag: tag, skip_preprocess_references: true) rule_builder.lhs = new_token rule_builder.user_code = token rule_builder.complete_input rule_builder.setup_rules @rule_builders_for_derived_rules << rule_builder + when Lrama::Lexer::Token::Empty + # Noop else raise "Unexpected token. #{token}" end end - end - def resolve_inline_rhs(rule_builder, inline_rhs, index, bindings = nil) - rhs.each_with_index do |token, i| - if index == i - inline_rhs.symbols.each { |sym| rule_builder.add_rhs(bindings.nil? ? sym : bindings.resolve_symbol(sym)) } - else - rule_builder.add_rhs(token) - end - end + @replaced_rhs = replaced_rhs end - def replace_inline_user_code(inline_rhs, index) - return user_code if inline_rhs.user_code.nil? - return user_code if user_code.nil? - - code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value) - user_code.references.each do |ref| - next if ref.index.nil? || ref.index <= index # nil is a case for `$$` - code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (inline_rhs.symbols.count-1)}") - code = code.gsub(/@#{ref.index}/, "@#{ref.index + (inline_rhs.symbols.count-1)}") + # @rbs () -> void + def resolve_inline_rules + while @rule_builders_for_parameterized.any?(&:has_inline_rules?) do + @rule_builders_for_parameterized = @rule_builders_for_parameterized.flat_map do |rule_builder| + if rule_builder.has_inline_rules? + inlined_builders = Inline::Resolver.new(rule_builder).resolve + inlined_builders.each { |builder| builder.setup_rules } + inlined_builders + else + rule_builder + end + end end - Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location) end + # @rbs () -> void def numberize_references # Bison n'th component is 1-origin (rhs + [user_code]).compact.each.with_index(1) do |token, i| @@ -209,7 +220,10 @@ def numberize_references if ref_name == '$' ref.name = '$' else - candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) } + candidates = ([lhs] + rhs).each_with_index.select do |token, _i| + # @type var token: Lexer::Token::Base + token.referred_by?(ref_name) + end if candidates.size >= 2 token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.") @@ -244,6 +258,7 @@ def numberize_references end end + # @rbs () -> void def flush_user_code if (c = @user_code) @rhs << c diff --git a/tool/lrama/lib/lrama/grammar/stdlib.y b/tool/lrama/lib/lrama/grammar/stdlib.y index d6e89c908c5139..dd397c9e08d476 100644 --- a/tool/lrama/lib/lrama/grammar/stdlib.y +++ b/tool/lrama/lib/lrama/grammar/stdlib.y @@ -3,26 +3,43 @@ stdlib.y This is lrama's standard library. It provides a number of - parameterizing rule definitions, such as options and lists, + parameterized rule definitions, such as options and lists, that should be useful in a number of situations. **********************************************************************/ +%% + // ------------------------------------------------------------------- // Options /* - * program: option(number) + * program: option(X) + * + * => + * + * program: option_X + * option_X: %empty + * option_X: X + */ +%rule option(X) + : /* empty */ + | X + ; + + +/* + * program: ioption(X) * * => * - * program: option_number - * option_number: %empty - * option_number: number + * program: %empty + * program: X */ -%rule option(X): /* empty */ - | X - ; +%rule %inline ioption(X) + : /* empty */ + | X + ; // ------------------------------------------------------------------- // Sequences @@ -35,8 +52,9 @@ * program: preceded_opening_X * preceded_opening_X: opening X */ -%rule preceded(opening, X): opening X { $$ = $2; } - ; +%rule preceded(opening, X) + : opening X { $$ = $2; } + ; /* * program: terminated(X, closing) @@ -46,8 +64,9 @@ * program: terminated_X_closing * terminated_X_closing: X closing */ -%rule terminated(X, closing): X closing { $$ = $1; } - ; +%rule terminated(X, closing) + : X closing { $$ = $1; } + ; /* * program: delimited(opening, X, closing) @@ -57,66 +76,67 @@ * program: delimited_opening_X_closing * delimited_opening_X_closing: opening X closing */ -%rule delimited(opening, X, closing): opening X closing { $$ = $2; } - ; +%rule delimited(opening, X, closing) + : opening X closing { $$ = $2; } + ; // ------------------------------------------------------------------- // Lists /* - * program: list(number) + * program: list(X) * * => * - * program: list_number - * list_number: %empty - * list_number: list_number number + * program: list_X + * list_X: %empty + * list_X: list_X X */ -%rule list(X): /* empty */ - | list(X) X - ; +%rule list(X) + : /* empty */ + | list(X) X + ; /* - * program: nonempty_list(number) + * program: nonempty_list(X) * * => * - * program: nonempty_list_number - * nonempty_list_number: number - * nonempty_list_number: nonempty_list_number number + * program: nonempty_list_X + * nonempty_list_X: X + * nonempty_list_X: nonempty_list_X X */ -%rule nonempty_list(X): X - | nonempty_list(X) X - ; +%rule nonempty_list(X) + : X + | nonempty_list(X) X + ; /* - * program: separated_nonempty_list(comma, number) + * program: separated_nonempty_list(separator, X) * * => * - * program: separated_nonempty_list_comma_number - * separated_nonempty_list_comma_number: number - * separated_nonempty_list_comma_number: separated_nonempty_list_comma_number comma number + * program: separated_nonempty_list_separator_X + * separated_nonempty_list_separator_X: X + * separated_nonempty_list_separator_X: separated_nonempty_list_separator_X separator X */ -%rule separated_nonempty_list(separator, X): X - | separated_nonempty_list(separator, X) separator X - ; +%rule separated_nonempty_list(separator, X) + : X + | separated_nonempty_list(separator, X) separator X + ; /* - * program: separated_list(comma, number) + * program: separated_list(separator, X) * * => * - * program: separated_list_comma_number - * separated_list_comma_number: option_separated_nonempty_list_comma_number - * option_separated_nonempty_list_comma_number: %empty - * option_separated_nonempty_list_comma_number: separated_nonempty_list_comma_number - * separated_nonempty_list_comma_number: number - * separated_nonempty_list_comma_number: comma separated_nonempty_list_comma_number number + * program: separated_list_separator_X + * separated_list_separator_X: option_separated_nonempty_list_separator_X + * option_separated_nonempty_list_separator_X: %empty + * option_separated_nonempty_list_separator_X: separated_nonempty_list_separator_X + * separated_nonempty_list_separator_X: X + * separated_nonempty_list_separator_X: separator separated_nonempty_list_separator_X X */ -%rule separated_list(separator, X): option(separated_nonempty_list(separator, X)) - ; - -%% - -%union{}; +%rule separated_list(separator, X) + : option(separated_nonempty_list(separator, X)) + ; diff --git a/tool/lrama/lib/lrama/grammar/symbol.rb b/tool/lrama/lib/lrama/grammar/symbol.rb index f9dffcad6c6b96..07aee0c0a20be9 100644 --- a/tool/lrama/lib/lrama/grammar/symbol.rb +++ b/tool/lrama/lib/lrama/grammar/symbol.rb @@ -1,19 +1,35 @@ +# rbs_inline: enabled # frozen_string_literal: true # Symbol is both of nterm and term # `number` is both for nterm and term # `token_id` is tokentype for term, internal sequence number for nterm # -# TODO: Add validation for ASCII code range for Token::Char module Lrama class Grammar class Symbol - attr_accessor :id, :alias_name, :tag, :number, :token_id, :nullable, :precedence, - :printer, :destructor, :error_token, :first_set, :first_set_bitmap - attr_reader :term - attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + attr_accessor :id #: Lexer::Token::Base + attr_accessor :alias_name #: String? + attr_reader :number #: Integer + attr_accessor :number_bitmap #: Bitmap::bitmap + attr_accessor :tag #: Lexer::Token::Tag? + attr_accessor :token_id #: Integer + attr_accessor :nullable #: bool + attr_accessor :precedence #: Precedence? + attr_accessor :printer #: Printer? + attr_accessor :destructor #: Destructor? + attr_accessor :error_token #: ErrorToken + attr_accessor :first_set #: Set[Grammar::Symbol] + attr_accessor :first_set_bitmap #: Bitmap::bitmap + attr_reader :term #: bool + attr_writer :eof_symbol #: bool + attr_writer :error_symbol #: bool + attr_writer :undef_symbol #: bool + attr_writer :accept_symbol #: bool + # @rbs (id: Lexer::Token::Base, term: bool, ?alias_name: String?, ?number: Integer?, ?tag: Lexer::Token::Tag?, + # ?token_id: Integer?, ?nullable: bool?, ?precedence: Precedence?, ?printer: Printer?) -> void def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil, destructor: nil) @id = id @alias_name = alias_name @@ -27,77 +43,105 @@ def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil @destructor = destructor end + # @rbs (Integer) -> void + def number=(number) + @number = number + @number_bitmap = Bitmap::from_integer(number) + end + + # @rbs () -> bool def term? term end + # @rbs () -> bool def nterm? !term end + # @rbs () -> bool def eof_symbol? !!@eof_symbol end + # @rbs () -> bool def error_symbol? !!@error_symbol end + # @rbs () -> bool def undef_symbol? !!@undef_symbol end + # @rbs () -> bool def accept_symbol? !!@accept_symbol end + # @rbs () -> bool + def midrule? + return false if term? + + name.include?("$") || name.include?("@") + end + + # @rbs () -> String + def name + id.s_value + end + + # @rbs () -> String def display_name - alias_name || id.s_value + alias_name || name end # name for yysymbol_kind_t # # See: b4_symbol_kind_base # @type var name: String + # @rbs () -> String def enum_name case when accept_symbol? - name = "YYACCEPT" + res = "YYACCEPT" when eof_symbol? - name = "YYEOF" + res = "YYEOF" when term? && id.is_a?(Lrama::Lexer::Token::Char) - name = number.to_s + display_name + res = number.to_s + display_name when term? && id.is_a?(Lrama::Lexer::Token::Ident) - name = id.s_value - when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) - name = number.to_s + id.s_value + res = name + when midrule? + res = number.to_s + name when nterm? - name = id.s_value + res = name else raise "Unexpected #{self}" end - "YYSYMBOL_" + name.gsub(/\W+/, "_") + "YYSYMBOL_" + res.gsub(/\W+/, "_") end # comment for yysymbol_kind_t + # + # @rbs () -> String? def comment case when accept_symbol? # YYSYMBOL_YYACCEPT - id.s_value + name when eof_symbol? # YYEOF alias_name when (term? && 0 < token_id && token_id < 128) # YYSYMBOL_3_backslash_, YYSYMBOL_14_ - alias_name || id.s_value - when id.s_value.include?("$") || id.s_value.include?("@") + display_name + when midrule? # YYSYMBOL_21_1 - id.s_value + name else # YYSYMBOL_keyword_class, YYSYMBOL_strings_1 - alias_name || id.s_value + display_name end end end diff --git a/tool/lrama/lib/lrama/grammar/symbols/resolver.rb b/tool/lrama/lib/lrama/grammar/symbols/resolver.rb index 52f4ff90bdf1b8..085a835d2838de 100644 --- a/tool/lrama/lib/lrama/grammar/symbols/resolver.rb +++ b/tool/lrama/lib/lrama/grammar/symbols/resolver.rb @@ -1,24 +1,54 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Symbols class Resolver - attr_reader :terms, :nterms - + # @rbs! + # + # interface _DelegatedMethods + # def symbols: () -> Array[Grammar::Symbol] + # def nterms: () -> Array[Grammar::Symbol] + # def terms: () -> Array[Grammar::Symbol] + # def add_nterm: (id: Lexer::Token::Base, ?alias_name: String?, ?tag: Lexer::Token::Tag?) -> Grammar::Symbol + # def add_term: (id: Lexer::Token::Base, ?alias_name: String?, ?tag: Lexer::Token::Tag?, ?token_id: Integer?, ?replace: bool) -> Grammar::Symbol + # def find_symbol_by_number!: (Integer number) -> Grammar::Symbol + # def find_symbol_by_id!: (Lexer::Token::Base id) -> Grammar::Symbol + # def token_to_symbol: (Lexer::Token::Base token) -> Grammar::Symbol + # def find_symbol_by_s_value!: (::String s_value) -> Grammar::Symbol + # def fill_nterm_type: (Array[Grammar::Type] types) -> void + # def fill_symbol_number: () -> void + # def fill_printer: (Array[Grammar::Printer] printers) -> void + # def fill_destructor: (Array[Destructor] destructors) -> (Destructor | bot) + # def fill_error_token: (Array[Grammar::ErrorToken] error_tokens) -> void + # def sort_by_number!: () -> Array[Grammar::Symbol] + # end + # + # @symbols: Array[Grammar::Symbol]? + # @number: Integer + # @used_numbers: Hash[Integer, bool] + + attr_reader :terms #: Array[Grammar::Symbol] + attr_reader :nterms #: Array[Grammar::Symbol] + + # @rbs () -> void def initialize @terms = [] @nterms = [] end + # @rbs () -> Array[Grammar::Symbol] def symbols @symbols ||= (@terms + @nterms) end + # @rbs () -> Array[Grammar::Symbol] def sort_by_number! symbols.sort_by!(&:number) end + # @rbs (id: Lexer::Token::Base, ?alias_name: String?, ?tag: Lexer::Token::Tag?, ?token_id: Integer?, ?replace: bool) -> Grammar::Symbol def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) if token_id && (sym = find_symbol_by_token_id(token_id)) if replace @@ -43,6 +73,7 @@ def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) term end + # @rbs (id: Lexer::Token::Base, ?alias_name: String?, ?tag: Lexer::Token::Tag?) -> Grammar::Symbol def add_nterm(id:, alias_name: nil, tag: nil) if (sym = find_symbol_by_id(id)) return sym @@ -57,32 +88,39 @@ def add_nterm(id:, alias_name: nil, tag: nil) nterm end + # @rbs (::String s_value) -> Grammar::Symbol? def find_term_by_s_value(s_value) terms.find { |s| s.id.s_value == s_value } end + # @rbs (::String s_value) -> Grammar::Symbol? def find_symbol_by_s_value(s_value) symbols.find { |s| s.id.s_value == s_value } end + # @rbs (::String s_value) -> Grammar::Symbol def find_symbol_by_s_value!(s_value) find_symbol_by_s_value(s_value) || (raise "Symbol not found. value: `#{s_value}`") end + # @rbs (Lexer::Token::Base id) -> Grammar::Symbol? def find_symbol_by_id(id) symbols.find do |s| s.id == id || s.alias_name == id.s_value end end + # @rbs (Lexer::Token::Base id) -> Grammar::Symbol def find_symbol_by_id!(id) find_symbol_by_id(id) || (raise "Symbol not found. #{id}") end + # @rbs (Integer token_id) -> Grammar::Symbol? def find_symbol_by_token_id(token_id) symbols.find {|s| s.token_id == token_id } end + # @rbs (Integer number) -> Grammar::Symbol def find_symbol_by_number!(number) sym = symbols[number] @@ -92,6 +130,7 @@ def find_symbol_by_number!(number) sym end + # @rbs () -> void def fill_symbol_number # YYEMPTY = -2 # YYEOF = 0 @@ -102,6 +141,7 @@ def fill_symbol_number fill_nterms_number end + # @rbs (Array[Grammar::Type] types) -> void def fill_nterm_type(types) types.each do |type| nterm = find_nterm_by_id!(type.id) @@ -109,6 +149,7 @@ def fill_nterm_type(types) end end + # @rbs (Array[Grammar::Printer] printers) -> void def fill_printer(printers) symbols.each do |sym| printers.each do |printer| @@ -126,6 +167,7 @@ def fill_printer(printers) end end + # @rbs (Array[Destructor] destructors) -> (Array[Grammar::Symbol] | bot) def fill_destructor(destructors) symbols.each do |sym| destructors.each do |destructor| @@ -143,6 +185,7 @@ def fill_destructor(destructors) end end + # @rbs (Array[Grammar::ErrorToken] error_tokens) -> void def fill_error_token(error_tokens) symbols.each do |sym| error_tokens.each do |token| @@ -160,28 +203,33 @@ def fill_error_token(error_tokens) end end + # @rbs (Lexer::Token::Base token) -> Grammar::Symbol def token_to_symbol(token) case token - when Lrama::Lexer::Token + when Lrama::Lexer::Token::Base find_symbol_by_id!(token) else raise "Unknown class: #{token}" end end + # @rbs () -> void def validate! validate_number_uniqueness! validate_alias_name_uniqueness! + validate_symbols! end private + # @rbs (Lexer::Token::Base id) -> Grammar::Symbol def find_nterm_by_id!(id) @nterms.find do |s| s.id == id end || (raise "Symbol not found. #{id}") end + # @rbs () -> void def fill_terms_number # Character literal in grammar file has # token id corresponding to ASCII code by default, @@ -245,6 +293,7 @@ def fill_terms_number end end + # @rbs () -> void def fill_nterms_number token_id = 0 @@ -266,6 +315,7 @@ def fill_nterms_number end end + # @rbs () -> Hash[Integer, bool] def used_numbers return @used_numbers if defined?(@used_numbers) @@ -276,6 +326,7 @@ def used_numbers @used_numbers end + # @rbs () -> void def validate_number_uniqueness! invalid = symbols.group_by(&:number).select do |number, syms| syms.count > 1 @@ -286,6 +337,7 @@ def validate_number_uniqueness! raise "Symbol number is duplicated. #{invalid}" end + # @rbs () -> void def validate_alias_name_uniqueness! invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms| syms.count > 1 @@ -295,6 +347,15 @@ def validate_alias_name_uniqueness! raise "Symbol alias name is duplicated. #{invalid}" end + + # @rbs () -> void + def validate_symbols! + symbols.each { |sym| sym.id.validate } + errors = symbols.map { |sym| sym.id.errors }.flatten.compact + return if errors.empty? + + raise errors.join("\n") + end end end end diff --git a/tool/lrama/lib/lrama/grammar/type.rb b/tool/lrama/lib/lrama/grammar/type.rb index 65537288b310b4..c6317694472e61 100644 --- a/tool/lrama/lib/lrama/grammar/type.rb +++ b/tool/lrama/lib/lrama/grammar/type.rb @@ -1,15 +1,27 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Type - attr_reader :id, :tag + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @id: Lexer::Token::Base + # @tag: Lexer::Token::Tag + attr_reader :id #: Lexer::Token::Base + attr_reader :tag #: Lexer::Token::Tag + + # @rbs (id: Lexer::Token::Base, tag: Lexer::Token::Tag) -> void def initialize(id:, tag:) @id = id @tag = tag end + # @rbs (Grammar::Type other) -> bool def ==(other) self.class == other.class && self.id == other.id && diff --git a/tool/lrama/lib/lrama/grammar/union.rb b/tool/lrama/lib/lrama/grammar/union.rb index 5f1bee0069af23..774cc66fc6617e 100644 --- a/tool/lrama/lib/lrama/grammar/union.rb +++ b/tool/lrama/lib/lrama/grammar/union.rb @@ -1,8 +1,19 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar - class Union < Struct.new(:code, :lineno, keyword_init: true) + class Union + attr_reader :code #: Grammar::Code::NoReferenceCode + attr_reader :lineno #: Integer + + # @rbs (code: Grammar::Code::NoReferenceCode, lineno: Integer) -> void + def initialize(code:, lineno:) + @code = code + @lineno = lineno + end + + # @rbs () -> String def braces_less_code # Braces is already removed by lexer code.s_value diff --git a/tool/lrama/lib/lrama/grammar_validator.rb b/tool/lrama/lib/lrama/grammar_validator.rb deleted file mode 100644 index 7790499589dfcc..00000000000000 --- a/tool/lrama/lib/lrama/grammar_validator.rb +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class GrammarValidator - def initialize(grammar, states, logger) - @grammar = grammar - @states = states - @logger = logger - end - - def valid? - conflicts_within_threshold? - end - - private - - def conflicts_within_threshold? - return true unless @grammar.expect - - [sr_conflicts_within_threshold(@grammar.expect), rr_conflicts_within_threshold(0)].all? - end - - def sr_conflicts_within_threshold(expected) - return true if expected == @states.sr_conflicts_count - - @logger.error("shift/reduce conflicts: #{@states.sr_conflicts_count} found, #{expected} expected") - false - end - - def rr_conflicts_within_threshold(expected) - return true if expected == @states.rr_conflicts_count - - @logger.error("reduce/reduce conflicts: #{@states.rr_conflicts_count} found, #{expected} expected") - false - end - end -end diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index c50af82ae4c300..ce98b505a72ae7 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require "strscan" @@ -8,10 +9,26 @@ module Lrama class Lexer - attr_reader :head_line, :head_column, :line - attr_accessor :status, :end_symbol - - SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze + # @rbs! + # + # type token = lexer_token | c_token + # + # type lexer_token = [String, Token::Token] | + # [::Symbol, Token::Tag] | + # [::Symbol, Token::Char] | + # [::Symbol, Token::Str] | + # [::Symbol, Token::Int] | + # [::Symbol, Token::Ident] + # + # type c_token = [:C_DECLARATION, Token::UserCode] + + attr_reader :head_line #: Integer + attr_reader :head_column #: Integer + attr_reader :line #: Integer + attr_accessor :status #: :initial | :c_declaration + attr_accessor :end_symbol #: String? + + SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze #: Array[String] PERCENT_TOKENS = %w( %union %token @@ -42,8 +59,11 @@ class Lexer %no-stdlib %inline %locations - ).freeze + %categories + %start + ).freeze #: Array[String] + # @rbs (GrammarFile grammar_file) -> void def initialize(grammar_file) @grammar_file = grammar_file @scanner = StringScanner.new(grammar_file.text) @@ -53,6 +73,7 @@ def initialize(grammar_file) @end_symbol = nil end + # @rbs () -> token? def next_token case @status when :initial @@ -62,10 +83,12 @@ def next_token end end + # @rbs () -> Integer def column @scanner.pos - @head end + # @rbs () -> Location def location Location.new( grammar_file: @grammar_file, @@ -74,13 +97,14 @@ def location ) end + # @rbs () -> lexer_token? def lex_token until @scanner.eos? do case when @scanner.scan(/\n/) newline when @scanner.scan(/\s+/) - # noop + @scanner.matched.count("\n").times { newline } when @scanner.scan(/\/\*/) lex_comment when @scanner.scan(/\/\/.*(?\n)?/) @@ -96,11 +120,11 @@ def lex_token when @scanner.eos? return when @scanner.scan(/#{SYMBOLS.join('|')}/) - return [@scanner.matched, @scanner.matched] + return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) - return [@scanner.matched, @scanner.matched] + return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/[\?\+\*]/) - return [@scanner.matched, @scanner.matched] + return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/<\w+>/) return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/'.'/) @@ -108,9 +132,9 @@ def lex_token when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/) return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/".*?"/) - return [:STRING, %Q(#{@scanner.matched})] + return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)] when @scanner.scan(/\d+/) - return [:INTEGER, Integer(@scanner.matched)] + return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)] when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location) type = @@ -121,51 +145,53 @@ def lex_token end return [type, token] else - raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}." + raise ParseError, location.generate_error_message("Unexpected token") # steep:ignore UnknownConstant end end + # @rbs () -> c_token def lex_c_code nested = 0 - code = '' + code = +'' reset_first_position until @scanner.eos? do case when @scanner.scan(/{/) - code += @scanner.matched + code << @scanner.matched nested += 1 when @scanner.scan(/}/) if nested == 0 && @end_symbol == '}' @scanner.unscan return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] else - code += @scanner.matched + code << @scanner.matched nested -= 1 end when @scanner.check(/#{@end_symbol}/) return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] when @scanner.scan(/\n/) - code += @scanner.matched + code << @scanner.matched newline when @scanner.scan(/".*?"/) - code += %Q(#{@scanner.matched}) + code << %Q(#{@scanner.matched}) @line += @scanner.matched.count("\n") when @scanner.scan(/'.*?'/) - code += %Q(#{@scanner.matched}) + code << %Q(#{@scanner.matched}) when @scanner.scan(/[^\"'\{\}\n]+/) - code += @scanner.matched - when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) - code += @scanner.matched + code << @scanner.matched + when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) # steep:ignore + code << @scanner.matched else - code += @scanner.getch + code << @scanner.getch end end - raise ParseError, "Unexpected code: #{code}." + raise ParseError, location.generate_error_message("Unexpected code: #{code}") # steep:ignore UnknownConstant end private + # @rbs () -> void def lex_comment until @scanner.eos? do case @@ -178,11 +204,13 @@ def lex_comment end end + # @rbs () -> void def reset_first_position @head_line = line @head_column = column end + # @rbs () -> void def newline @line += 1 @head = @scanner.pos diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb index defdbf8a0bc575..4465576d53ba4f 100644 --- a/tool/lrama/lib/lrama/lexer/location.rb +++ b/tool/lrama/lib/lrama/lexer/location.rb @@ -69,15 +69,15 @@ def to_s def generate_error_message(error_message) <<~ERROR.chomp #{path}:#{first_line}:#{first_column}: #{error_message} - #{line_with_carets} + #{error_with_carets} ERROR end # @rbs () -> String - def line_with_carets + def error_with_carets <<~TEXT - #{text} - #{carets} + #{formatted_first_lineno} | #{text} + #{line_number_padding} | #{carets_line} TEXT end @@ -89,13 +89,30 @@ def path end # @rbs () -> String - def blanks - (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ') + def carets_line + leading_whitespace + highlight_marker end # @rbs () -> String - def carets - blanks + '^' * (last_column - first_column) + def leading_whitespace + (text[0...first_column] or raise "Invalid first_column: #{first_column}") + .gsub(/[^\t]/, ' ') + end + + # @rbs () -> String + def highlight_marker + length = last_column - first_column + '^' + '~' * [0, length - 1].max + end + + # @rbs () -> String + def formatted_first_lineno + first_line.to_s.rjust(4) + end + + # @rbs () -> String + def line_number_padding + ' ' * formatted_first_lineno.length end # @rbs () -> String diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb index 63da8be4a40653..37f77aa06955b6 100644 --- a/tool/lrama/lib/lrama/lexer/token.rb +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -1,70 +1,20 @@ # rbs_inline: enabled # frozen_string_literal: true +require_relative 'token/base' require_relative 'token/char' +require_relative 'token/empty' require_relative 'token/ident' require_relative 'token/instantiate_rule' +require_relative 'token/int' +require_relative 'token/str' require_relative 'token/tag' +require_relative 'token/token' require_relative 'token/user_code' module Lrama class Lexer - class Token - attr_reader :s_value #: String - attr_reader :location #: Location - attr_accessor :alias_name #: String - attr_accessor :referred #: bool - - # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void - def initialize(s_value:, alias_name: nil, location: nil) - s_value.freeze - @s_value = s_value - @alias_name = alias_name - @location = location - end - - # @rbs () -> String - def to_s - "value: `#{s_value}`, location: #{location}" - end - - # @rbs (String string) -> bool - def referred_by?(string) - [self.s_value, self.alias_name].compact.include?(string) - end - - # @rbs (Token other) -> bool - def ==(other) - self.class == other.class && self.s_value == other.s_value - end - - # @rbs () -> Integer - def first_line - location.first_line - end - alias :line :first_line - - # @rbs () -> Integer - def first_column - location.first_column - end - alias :column :first_column - - # @rbs () -> Integer - def last_line - location.last_line - end - - # @rbs () -> Integer - def last_column - location.last_column - end - - # @rbs (Lrama::Grammar::Reference ref, String message) -> bot - def invalid_ref(ref, message) - location = self.location.partial_location(ref.first_column, ref.last_column) - raise location.generate_error_message(message) - end + module Token end end end diff --git a/tool/lrama/lib/lrama/lexer/token/base.rb b/tool/lrama/lib/lrama/lexer/token/base.rb new file mode 100644 index 00000000000000..3df93bbc737f7c --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/base.rb @@ -0,0 +1,73 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + class Base + attr_reader :s_value #: String + attr_reader :location #: Location + attr_accessor :alias_name #: String + attr_accessor :referred #: bool + attr_reader :errors #: Array[String] + + # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void + def initialize(s_value:, alias_name: nil, location: nil) + s_value.freeze + @s_value = s_value + @alias_name = alias_name + @location = location + @errors = [] + end + + # @rbs () -> String + def to_s + "value: `#{s_value}`, location: #{location}" + end + + # @rbs (String string) -> bool + def referred_by?(string) + [self.s_value, self.alias_name].compact.include?(string) + end + + # @rbs (Lexer::Token::Base other) -> bool + def ==(other) + self.class == other.class && self.s_value == other.s_value + end + + # @rbs () -> Integer + def first_line + location.first_line + end + alias :line :first_line + + # @rbs () -> Integer + def first_column + location.first_column + end + alias :column :first_column + + # @rbs () -> Integer + def last_line + location.last_line + end + + # @rbs () -> Integer + def last_column + location.last_column + end + + # @rbs (Lrama::Grammar::Reference ref, String message) -> bot + def invalid_ref(ref, message) + location = self.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message(message) + end + + # @rbs () -> bool + def validate + true + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb index fcab7a588f5e40..f4ef7c9fbcd90e 100644 --- a/tool/lrama/lib/lrama/lexer/token/char.rb +++ b/tool/lrama/lib/lrama/lexer/token/char.rb @@ -3,8 +3,21 @@ module Lrama class Lexer - class Token - class Char < Token + module Token + class Char < Base + # @rbs () -> void + def validate + validate_ascii_code_range + end + + private + + # @rbs () -> void + def validate_ascii_code_range + unless s_value.ascii_only? + errors << "Invalid character: `#{s_value}`. Only ASCII characters are allowed." + end + end end end end diff --git a/tool/lrama/lib/lrama/lexer/token/empty.rb b/tool/lrama/lib/lrama/lexer/token/empty.rb new file mode 100644 index 00000000000000..375e256493bdef --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/empty.rb @@ -0,0 +1,14 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + class Empty < Base + def initialize(location: nil) + super(s_value: '%empty', location: location) + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb index 8b1328a040fadd..4880be907330c8 100644 --- a/tool/lrama/lib/lrama/lexer/token/ident.rb +++ b/tool/lrama/lib/lrama/lexer/token/ident.rb @@ -3,8 +3,8 @@ module Lrama class Lexer - class Token - class Ident < Token + module Token + class Ident < Base end end end diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb index 37d412aa838b3f..7051ba75a4b2ab 100644 --- a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb +++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb @@ -3,12 +3,12 @@ module Lrama class Lexer - class Token - class InstantiateRule < Token - attr_reader :args #: Array[Lexer::Token] + module Token + class InstantiateRule < Base + attr_reader :args #: Array[Lexer::Token::Base] attr_reader :lhs_tag #: Lexer::Token::Tag? - # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token], ?lhs_tag: Lexer::Token::Tag?) -> void + # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token::Base], ?lhs_tag: Lexer::Token::Tag?) -> void def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil) super s_value: s_value, alias_name: alias_name, location: location @args = args diff --git a/tool/lrama/lib/lrama/lexer/token/int.rb b/tool/lrama/lib/lrama/lexer/token/int.rb new file mode 100644 index 00000000000000..7daf48d4d36970 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/int.rb @@ -0,0 +1,14 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + class Int < Base + # @rbs! + # def initialize: (s_value: Integer, ?alias_name: String, ?location: Location) -> void + # def s_value: () -> Integer + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/str.rb b/tool/lrama/lib/lrama/lexer/token/str.rb new file mode 100644 index 00000000000000..cf9de6cf0f49bd --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/str.rb @@ -0,0 +1,11 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + class Str < Base + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb index b346ef7c5c4075..68c6268219af09 100644 --- a/tool/lrama/lib/lrama/lexer/token/tag.rb +++ b/tool/lrama/lib/lrama/lexer/token/tag.rb @@ -3,8 +3,8 @@ module Lrama class Lexer - class Token - class Tag < Token + module Token + class Tag < Base # @rbs () -> String def member # Omit "<>" diff --git a/tool/lrama/lib/lrama/lexer/token/token.rb b/tool/lrama/lib/lrama/lexer/token/token.rb new file mode 100644 index 00000000000000..935797efc68f8f --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/token.rb @@ -0,0 +1,11 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + class Token < Base + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb index 4ef40e6dc8f790..166f04954a9275 100644 --- a/tool/lrama/lib/lrama/lexer/token/user_code.rb +++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb @@ -5,8 +5,8 @@ module Lrama class Lexer - class Token - class UserCode < Token + module Token + class UserCode < Base attr_accessor :tag #: Lexer::Token::Tag # @rbs () -> Array[Lrama::Grammar::Reference] @@ -38,43 +38,69 @@ def _references # @rbs (StringScanner scanner) -> Lrama::Grammar::Reference? def scan_reference(scanner) start = scanner.pos - case - # $ references - # It need to wrap an identifier with brackets to use ".-" for identifiers - when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $$ - tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil - return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) - when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $1 - tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil - return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos) - when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $program (named reference without brackets) - tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil - return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) - when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $[expr.right] (named reference with brackets) - tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil - return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) - - # @ references - # It need to wrap an identifier with brackets to use ".-" for identifiers - when scanner.scan(/@\$/) # @$ - return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) - when scanner.scan(/@(\d+)/) # @1 - return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos) - when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) - return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) - when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets) - return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + if scanner.scan(/ + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + \$(<[a-zA-Z0-9_]+>)?(?: + (\$) # $$, $$ + | (\d+) # $1, $2, $1 + | ([a-zA-Z_][a-zA-Z0-9_]*) # $foo, $expr, $program (named reference without brackets) + | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # $[expr.right], $[expr-right], $[expr.right] (named reference with brackets) + ) + | + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + @(?: + (\$) # @$ + | (\d+) # @1 + | ([a-zA-Z_][a-zA-Z0-9_]*) # @foo, @expr (named reference without brackets) + | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # @[expr.right], @[expr-right] (named reference with brackets) + ) + | + # $: references + \$: + (?: + (\$) # $:$ + | (\d+) # $:1 + | ([a-zA-Z_][a-zA-Z0-9_]*) # $:foo, $:expr (named reference without brackets) + | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # $:[expr.right], $:[expr-right] (named reference with brackets) + ) + /x) + case + # $ references + when scanner[2] # $$, $$ + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner[3] # $1, $2, $1 + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[3]), index: Integer(scanner[3]), ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner[4] # $foo, $expr, $program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[4], ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner[5] # $[expr.right], $[expr-right], $[expr.right] (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[5], ex_tag: tag, first_column: start, last_column: scanner.pos) - # $: references - when scanner.scan(/\$:\$/) # $:$ - return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos) - when scanner.scan(/\$:(\d+)/) # $:1 - return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos) - when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets) - return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) - when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets) - return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + # @ references + when scanner[6] # @$ + return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) + when scanner[7] # @1 + return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[7]), index: Integer(scanner[7]), first_column: start, last_column: scanner.pos) + when scanner[8] # @foo, @expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[8], first_column: start, last_column: scanner.pos) + when scanner[9] # @[expr.right], @[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[9], first_column: start, last_column: scanner.pos) + # $: references + when scanner[10] # $:$ + return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos) + when scanner[11] # $:1 + return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[11]), index: Integer(scanner[11]), first_column: start, last_column: scanner.pos) + when scanner[12] # $:foo, $:expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[12], first_column: start, last_column: scanner.pos) + when scanner[13] # $:[expr.right], $:[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[13], first_column: start, last_column: scanner.pos) + end end end end diff --git a/tool/lrama/lib/lrama/logger.rb b/tool/lrama/lib/lrama/logger.rb index 88bb9209604d60..291eea5296ec3e 100644 --- a/tool/lrama/lib/lrama/logger.rb +++ b/tool/lrama/lib/lrama/logger.rb @@ -8,14 +8,24 @@ def initialize(out = STDERR) @out = out end + # @rbs () -> void + def line_break + @out << "\n" + end + # @rbs (String message) -> void - def warn(message) + def trace(message) @out << message << "\n" end + # @rbs (String message) -> void + def warn(message) + @out << 'warning: ' << message << "\n" + end + # @rbs (String message) -> void def error(message) - @out << message << "\n" + @out << 'error: ' << message << "\n" end end end diff --git a/tool/lrama/lib/lrama/option_parser.rb b/tool/lrama/lib/lrama/option_parser.rb index 23988a5fbb8099..5a15d59c7bba97 100644 --- a/tool/lrama/lib/lrama/option_parser.rb +++ b/tool/lrama/lib/lrama/option_parser.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require 'optparse' @@ -5,17 +6,32 @@ module Lrama # Handle option parsing for the command line interface. class OptionParser + # @rbs! + # @options: Lrama::Options + # @trace: Array[String] + # @report: Array[String] + # @profile: Array[String] + + # @rbs (Array[String]) -> Lrama::Options + def self.parse(argv) + new.parse(argv) + end + + # @rbs () -> void def initialize @options = Options.new @trace = [] @report = [] + @profile = [] end + # @rbs (Array[String]) -> Lrama::Options def parse(argv) parse_by_option_parser(argv) @options.trace_opts = validate_trace(@trace) @options.report_opts = validate_report(@report) + @options.profile_opts = validate_profile(@profile) @options.grammar_file = argv.shift unless @options.grammar_file @@ -46,6 +62,7 @@ def parse(argv) private + # @rbs (Array[String]) -> void def parse_by_option_parser(argv) ::OptionParser.new do |o| o.banner = <<~BANNER @@ -60,7 +77,14 @@ def parse_by_option_parser(argv) o.separator 'Tuning the Parser:' o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', '--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } - o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } + o.separator " same as '-Dparse.trace'" + o.on('--locations', 'enable location support') {|v| @options.locations = true } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") do |v| + @options.define = v.each_with_object({}) do |item, hash| # steep:ignore UnannotatedEmptyCollection + key, value = item.split('=', 2) + hash[key] = value + end + end o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } @@ -91,10 +115,19 @@ def parse_by_option_parser(argv) o.on_tail ' time display generation time' o.on_tail ' all include all the above traces' o.on_tail ' none disable all traces' + o.on('--diagram=[FILE]', 'generate a diagram of the rules') do |v| + @options.diagram = true + @options.diagram_file = v if v + end + o.on('--profile=PROFILES', Array, 'profiles parser generation parts') {|v| @profile = v } + o.on_tail '' + o.on_tail 'PROFILES is a list of comma-separated words that can include:' + o.on_tail ' call-stack use sampling call-stack profiler (stackprof gem)' + o.on_tail ' memory use memory profiler (memory_profiler gem)' o.on('-v', '--verbose', "same as '--report=state'") {|_v| @report << 'states' } o.separator '' o.separator 'Diagnostics:' - o.on('-W', '--warnings', 'report the warnings') {|v| @options.diagnostic = true } + o.on('-W', '--warnings', 'report the warnings') {|v| @options.warnings = true } o.separator '' o.separator 'Error Recovery:' o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true } @@ -107,9 +140,10 @@ def parse_by_option_parser(argv) end end - ALIASED_REPORTS = { cex: :counterexamples }.freeze - VALID_REPORTS = %i[states itemsets lookaheads solved counterexamples rules terms verbose].freeze + ALIASED_REPORTS = { cex: :counterexamples }.freeze #: Hash[Symbol, Symbol] + VALID_REPORTS = %i[states itemsets lookaheads solved counterexamples rules terms verbose].freeze #: Array[Symbol] + # @rbs (Array[String]) -> Hash[Symbol, bool] def validate_report(report) h = { grammar: true } return h if report.empty? @@ -131,6 +165,7 @@ def validate_report(report) return h end + # @rbs (String) -> Symbol def aliased_report_option(opt) (ALIASED_REPORTS[opt.to_sym] || opt).to_sym end @@ -139,15 +174,16 @@ def aliased_report_option(opt) locations scan parse automaton bitsets closure grammar rules only-explicit-rules actions resource sets muscles tools m4-early m4 skeleton time ielr cex - ].freeze + ].freeze #: Array[String] NOT_SUPPORTED_TRACES = %w[ locations scan parse bitsets grammar resource sets muscles tools m4-early m4 skeleton ielr cex - ].freeze - SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES + ].freeze #: Array[String] + SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES #: Array[String] + # @rbs (Array[String]) -> Hash[Symbol, bool] def validate_trace(trace) - h = {} + h = {} #: Hash[Symbol, bool] return h if trace.empty? || trace == ['none'] all_traces = SUPPORTED_TRACES - %w[only-explicit-rules] if trace == ['all'] @@ -159,7 +195,25 @@ def validate_trace(trace) if SUPPORTED_TRACES.include?(t) h[t.gsub(/-/, '_').to_sym] = true else - raise "Invalid trace option \"#{t}\"." + raise "Invalid trace option \"#{t}\".\nValid options are [#{SUPPORTED_TRACES.join(", ")}]." + end + end + + return h + end + + VALID_PROFILES = %w[call-stack memory].freeze #: Array[String] + + # @rbs (Array[String]) -> Hash[Symbol, bool] + def validate_profile(profile) + h = {} #: Hash[Symbol, bool] + return h if profile.empty? + + profile.each do |t| + if VALID_PROFILES.include?(t) + h[t.gsub(/-/, '_').to_sym] = true + else + raise "Invalid profile option \"#{t}\".\nValid options are [#{VALID_PROFILES.join(", ")}]." end end diff --git a/tool/lrama/lib/lrama/options.rb b/tool/lrama/lib/lrama/options.rb index 08f75a770faa00..87aec62448a959 100644 --- a/tool/lrama/lib/lrama/options.rb +++ b/tool/lrama/lib/lrama/options.rb @@ -1,28 +1,46 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama # Command line options. class Options - attr_accessor :skeleton, :header, :header_file, - :report_file, :outfile, - :error_recovery, :grammar_file, - :trace_opts, :report_opts, - :diagnostic, :y, :debug, :define + attr_accessor :skeleton #: String + attr_accessor :locations #: bool + attr_accessor :header #: bool + attr_accessor :header_file #: String? + attr_accessor :report_file #: String? + attr_accessor :outfile #: String + attr_accessor :error_recovery #: bool + attr_accessor :grammar_file #: String + attr_accessor :trace_opts #: Hash[Symbol, bool]? + attr_accessor :report_opts #: Hash[Symbol, bool]? + attr_accessor :warnings #: bool + attr_accessor :y #: IO + attr_accessor :debug #: bool + attr_accessor :define #: Hash[String, String] + attr_accessor :diagram #: bool + attr_accessor :diagram_file #: String + attr_accessor :profile_opts #: Hash[Symbol, bool]? + # @rbs () -> void def initialize @skeleton = "bison/yacc.c" + @locations = false @define = {} @header = false @header_file = nil @report_file = nil @outfile = "y.tab.c" @error_recovery = false - @grammar_file = nil + @grammar_file = '' @trace_opts = nil @report_opts = nil - @diagnostic = false + @warnings = false @y = STDIN @debug = false + @diagram = false + @diagram_file = "diagram.html" + @profile_opts = nil end end end diff --git a/tool/lrama/lib/lrama/output.rb b/tool/lrama/lib/lrama/output.rb index 3c7316ac6d871c..d527be8bd40f89 100644 --- a/tool/lrama/lib/lrama/output.rb +++ b/tool/lrama/lib/lrama/output.rb @@ -1,13 +1,12 @@ # frozen_string_literal: true -require "erb" require "forwardable" -require_relative "report/duration" +require_relative "tracer/duration" module Lrama class Output extend Forwardable - include Report::Duration + include Tracer::Duration attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header @@ -43,7 +42,7 @@ def self.erb(input) end def render_partial(file) - render_template(partial_file(file)) + ERB.render(partial_file(file), context: @context, output: self) end def render @@ -405,16 +404,10 @@ def percent_code(name) private def eval_template(file, path) - tmp = render_template(file) + tmp = ERB.render(file, context: @context, output: self) replace_special_variables(tmp, path) end - def render_template(file) - erb = self.class.erb(File.read(file)) - erb.filename = file - erb.result_with_hash(context: @context, output: self) - end - def template_file File.join(template_dir, @template_name) end diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index 177e784e5c4eb0..20c3ad347f0491 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true # # DO NOT MODIFY!!!! # This file is automatically generated by Racc 1.8.1 @@ -654,22 +655,25 @@ def token_to_str(t) module Lrama class Parser < Racc::Parser -module_eval(<<'...end parser.y/module_eval...', 'parser.y', 428) +module_eval(<<'...end parser.y/module_eval...', 'parser.y', 504) -include Lrama::Report::Duration +include Lrama::Tracer::Duration -def initialize(text, path, debug = false, define = {}) +def initialize(text, path, debug = false, locations = false, define = {}) + @path = path @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) - @yydebug = debug + @yydebug = debug || define.key?('parse.trace') @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @locations = locations @define = define end def parse - report_duration(:parse) do + message = "parse '#{File.basename(@path)}'" + report_duration(message) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter, @define) + @grammar = Lrama::Grammar.new(@rule_counter, @locations, @define) @precedence_number = 0 reset_precs do_parse @@ -682,7 +686,14 @@ def next_token end def on_error(error_token_id, error_value, value_stack) - if error_value.is_a?(Lrama::Lexer::Token) + case error_value + when Lrama::Lexer::Token::Int + location = error_value.location + value = "#{error_value.s_value}" + when Lrama::Lexer::Token::Token + location = error_value.location + value = "\"#{error_value.s_value}\"" + when Lrama::Lexer::Token::Base location = error_value.location value = "'#{error_value.s_value}'" else @@ -696,7 +707,7 @@ def on_error(error_token_id, error_value, value_stack) end def on_action_error(error_message, error_value) - if error_value.is_a?(Lrama::Lexer::Token) + if error_value.is_a?(Lrama::Lexer::Token::Base) location = error_value.location else location = @lexer.location @@ -708,10 +719,15 @@ def on_action_error(error_message, error_value) private def reset_precs - @prec_seen = false + @opening_prec_seen = false + @trailing_prec_seen = false @code_after_prec = false end +def prec_seen? + @opening_prec_seen || @trailing_prec_seen +end + def begin_c_declaration(end_symbol) @lexer.status = :c_declaration @lexer.end_symbol = end_symbol @@ -729,306 +745,322 @@ def raise_parse_error(error_message, location) ##### State transition tables begin ### racc_action_table = [ - 89, 49, 90, 167, 49, 101, 173, 49, 101, 167, - 49, 101, 173, 6, 101, 80, 49, 49, 48, 48, - 41, 76, 76, 49, 49, 48, 48, 42, 76, 76, - 49, 49, 48, 48, 101, 96, 113, 49, 87, 48, - 150, 101, 96, 151, 45, 171, 169, 170, 151, 176, - 170, 91, 169, 170, 81, 176, 170, 20, 24, 25, - 26, 27, 28, 29, 30, 31, 87, 32, 33, 34, - 35, 36, 37, 38, 39, 49, 4, 48, 5, 101, - 96, 181, 182, 183, 128, 20, 24, 25, 26, 27, - 28, 29, 30, 31, 46, 32, 33, 34, 35, 36, - 37, 38, 39, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 53, 20, 24, 25, 26, 27, 28, 29, - 30, 31, 53, 32, 33, 34, 35, 36, 37, 38, - 39, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 44, 20, 24, 25, 26, 27, 28, 29, 30, 31, - 53, 32, 33, 34, 35, 36, 37, 38, 39, 49, - 4, 48, 5, 101, 96, 49, 49, 48, 48, 101, - 101, 49, 49, 48, 48, 101, 101, 49, 49, 48, - 197, 101, 101, 49, 49, 197, 48, 101, 101, 49, - 49, 197, 48, 101, 181, 182, 183, 128, 204, 210, - 217, 205, 205, 205, 49, 49, 48, 48, 49, 49, - 48, 48, 49, 49, 48, 48, 181, 182, 183, 116, - 117, 56, 53, 53, 53, 53, 53, 62, 63, 64, - 65, 66, 68, 68, 68, 82, 53, 53, 104, 108, - 108, 115, 122, 123, 125, 128, 129, 133, 139, 140, - 141, 142, 144, 145, 101, 154, 139, 157, 154, 161, - 162, 68, 164, 165, 172, 177, 154, 184, 128, 188, - 154, 190, 128, 154, 199, 154, 128, 68, 165, 206, - 165, 68, 68, 215, 128, 68 ] + 98, 98, 99, 99, 87, 53, 53, 52, 178, 110, + 110, 97, 53, 53, 184, 178, 110, 110, 53, 181, + 184, 162, 110, 6, 163, 181, 181, 53, 53, 52, + 52, 181, 79, 79, 53, 53, 52, 52, 43, 79, + 79, 53, 4, 52, 5, 110, 88, 94, 182, 125, + 126, 163, 100, 100, 180, 193, 194, 195, 137, 185, + 188, 180, 4, 44, 5, 185, 188, 94, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 46, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 47, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 47, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 12, 13, 50, + 57, 14, 15, 16, 17, 18, 19, 20, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 57, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 12, 13, 57, + 60, 14, 15, 16, 17, 18, 19, 20, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 57, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 53, 53, 52, + 52, 110, 105, 53, 53, 52, 52, 110, 105, 53, + 53, 52, 52, 110, 105, 53, 53, 52, 52, 110, + 105, 53, 53, 52, 52, 110, 110, 53, 53, 52, + 209, 110, 110, 53, 53, 209, 52, 110, 110, 53, + 53, 209, 52, 110, 193, 194, 195, 137, 216, 222, + 229, 217, 217, 217, 53, 53, 52, 52, 193, 194, + 195, 57, 57, 57, 57, 66, 67, 68, 69, 70, + 72, 72, 72, 86, 89, 47, 57, 57, 113, 117, + 117, 79, 123, 124, 131, 47, 133, 137, 139, 143, + 149, 150, 151, 152, 133, 155, 156, 157, 110, 166, + 149, 169, 172, 173, 72, 175, 176, 183, 189, 166, + 196, 137, 200, 202, 137, 166, 211, 166, 137, 72, + 176, 218, 176, 72, 72, 227, 137, 72 ] racc_action_check = [ - 47, 153, 47, 153, 159, 153, 159, 178, 159, 178, - 189, 178, 189, 1, 189, 39, 35, 36, 35, 36, - 5, 35, 36, 37, 38, 37, 38, 6, 37, 38, - 59, 74, 59, 74, 59, 59, 74, 60, 45, 60, - 138, 60, 60, 138, 9, 156, 153, 153, 156, 159, - 159, 47, 178, 178, 39, 189, 189, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 83, 45, 45, 45, - 45, 45, 45, 45, 45, 61, 0, 61, 0, 61, - 61, 166, 166, 166, 166, 83, 83, 83, 83, 83, - 83, 83, 83, 83, 11, 83, 83, 83, 83, 83, - 83, 83, 83, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 14, 3, 3, 3, 3, 3, 3, 3, - 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 15, 8, 8, 8, 8, 8, 8, 8, 8, 97, - 2, 97, 2, 97, 97, 71, 108, 71, 108, 71, - 108, 109, 169, 109, 169, 109, 169, 176, 184, 176, - 184, 176, 184, 190, 205, 190, 205, 190, 205, 206, - 12, 206, 12, 206, 174, 174, 174, 174, 196, 201, - 214, 196, 201, 214, 69, 76, 69, 76, 104, 105, - 104, 105, 111, 113, 111, 113, 198, 198, 198, 81, - 81, 16, 17, 20, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 40, 51, 56, 67, 70, - 72, 80, 84, 85, 86, 87, 93, 107, 115, 116, - 117, 118, 127, 128, 134, 140, 141, 143, 144, 145, - 146, 150, 151, 152, 158, 163, 165, 167, 168, 171, - 172, 173, 175, 177, 187, 188, 192, 193, 195, 197, - 200, 202, 204, 209, 210, 216 ] + 51, 97, 51, 97, 41, 75, 165, 75, 165, 75, + 165, 51, 171, 190, 171, 190, 171, 190, 201, 165, + 201, 148, 201, 1, 148, 171, 190, 36, 37, 36, + 37, 201, 36, 37, 38, 39, 38, 39, 5, 38, + 39, 117, 0, 117, 0, 117, 41, 46, 168, 88, + 88, 168, 51, 97, 165, 177, 177, 177, 177, 171, + 171, 190, 2, 6, 2, 201, 201, 90, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 9, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 10, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 11, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 3, 3, 12, + 14, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 15, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 8, 8, 16, + 17, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 18, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 63, 13, 63, + 13, 63, 63, 64, 73, 64, 73, 64, 64, 65, + 78, 65, 78, 65, 65, 106, 79, 106, 79, 106, + 106, 118, 180, 118, 180, 118, 180, 188, 196, 188, + 196, 188, 196, 202, 217, 202, 217, 202, 217, 218, + 113, 218, 113, 218, 186, 186, 186, 186, 208, 213, + 226, 208, 213, 226, 114, 123, 114, 123, 210, 210, + 210, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 40, 42, 47, 55, 60, 71, 74, + 76, 80, 81, 87, 91, 92, 93, 94, 102, 116, + 124, 125, 126, 127, 133, 136, 137, 138, 144, 150, + 151, 153, 156, 158, 162, 163, 164, 170, 174, 176, + 178, 179, 182, 184, 187, 189, 199, 200, 204, 205, + 207, 209, 212, 214, 216, 221, 222, 228 ] racc_action_pointer = [ - 66, 13, 150, 90, nil, 13, 27, nil, 118, 35, - nil, 88, 187, 63, 73, 101, 216, 173, nil, nil, - 174, nil, nil, nil, 175, 176, 177, 222, 223, 224, - 225, 226, 224, 225, 226, 13, 14, 20, 21, 10, - 233, nil, nil, nil, nil, 34, nil, -5, nil, nil, - nil, 187, nil, nil, nil, nil, 188, nil, nil, 27, - 34, 72, nil, nil, nil, nil, nil, 230, nil, 201, - 231, 162, 232, nil, 28, nil, 202, nil, nil, nil, - 200, 215, nil, 62, 233, 221, 222, 191, nil, nil, - nil, nil, nil, 244, nil, nil, nil, 156, nil, nil, - nil, nil, nil, nil, 205, 206, nil, 241, 163, 168, - nil, 209, nil, 210, nil, 243, 206, 209, 240, nil, - nil, nil, nil, nil, nil, nil, nil, 209, 248, nil, - nil, nil, nil, nil, 247, nil, nil, nil, -2, nil, - 208, 251, nil, 255, 211, 204, 210, nil, nil, nil, - 253, 257, 217, -2, nil, nil, 3, nil, 218, 1, - nil, nil, nil, 222, nil, 219, 30, 226, 214, 169, - nil, 226, 223, 230, 143, 218, 174, 226, 4, nil, - nil, nil, nil, nil, 175, nil, nil, 272, 228, 7, - 180, nil, 222, 269, nil, 232, 156, 238, 165, nil, - 234, 157, 273, nil, 274, 181, 186, nil, nil, 233, - 230, nil, nil, nil, 158, nil, 277, nil, nil ] + 32, 23, 52, 93, nil, 31, 63, nil, 123, 68, + 74, 84, 103, 165, 94, 111, 123, 135, 141, nil, + nil, nil, nil, nil, 215, 216, 217, 218, 230, 231, + 232, 233, 234, 232, 233, 234, 24, 25, 31, 32, + 238, -1, 242, nil, nil, nil, 43, 232, nil, nil, + nil, -5, nil, nil, nil, 230, nil, nil, nil, nil, + 231, nil, nil, 164, 170, 176, nil, nil, nil, nil, + nil, 240, nil, 171, 241, 2, 242, nil, 177, 183, + 243, 244, nil, nil, nil, nil, nil, 209, 45, nil, + 63, 245, 242, 243, 202, nil, nil, -4, nil, nil, + nil, nil, 256, nil, nil, nil, 182, nil, nil, nil, + nil, nil, nil, 207, 221, nil, 253, 38, 188, nil, + nil, nil, nil, 222, 255, 215, 218, 252, nil, nil, + nil, nil, nil, 251, nil, nil, 219, 261, 250, nil, + nil, nil, nil, nil, 261, nil, nil, nil, -24, nil, + 219, 265, nil, 269, nil, nil, 216, nil, 256, nil, + nil, nil, 266, 270, 227, 3, nil, nil, 3, nil, + 228, 9, nil, nil, 232, nil, 229, 3, 236, 226, + 189, nil, 236, nil, 239, nil, 162, 229, 194, 235, + 10, nil, nil, nil, nil, nil, 195, nil, nil, 284, + 237, 15, 200, nil, 233, 281, nil, 241, 173, 247, + 176, nil, 243, 174, 285, nil, 286, 201, 206, nil, + nil, 278, 241, nil, nil, nil, 175, nil, 289, nil, + nil ] racc_action_default = [ - -1, -128, -1, -3, -10, -128, -128, -2, -3, -128, - -16, -128, -128, -128, -128, -128, -128, -128, -24, -25, - -128, -32, -33, -34, -128, -128, -128, -128, -128, -128, - -128, -128, -50, -50, -50, -128, -128, -128, -128, -128, - -128, -13, 219, -4, -26, -128, -17, -123, -93, -94, - -122, -14, -19, -85, -20, -21, -128, -23, -31, -128, - -128, -128, -38, -39, -40, -41, -42, -43, -51, -128, - -44, -128, -45, -46, -88, -90, -128, -47, -48, -49, - -128, -128, -11, -5, -7, -95, -128, -68, -18, -124, - -125, -126, -15, -128, -22, -27, -28, -29, -35, -83, - -84, -127, -36, -37, -128, -52, -54, -56, -128, -79, - -81, -88, -89, -128, -91, -128, -128, -128, -128, -6, - -8, -9, -120, -96, -97, -98, -69, -128, -128, -86, - -30, -55, -53, -57, -76, -82, -80, -92, -128, -62, - -66, -128, -12, -128, -66, -128, -128, -58, -77, -78, - -50, -128, -60, -64, -67, -70, -128, -121, -99, -100, - -102, -119, -87, -128, -63, -66, -68, -93, -68, -128, - -116, -128, -66, -93, -68, -68, -128, -66, -65, -71, - -72, -108, -109, -110, -128, -74, -75, -128, -66, -101, - -128, -103, -68, -50, -107, -59, -128, -93, -111, -117, - -61, -128, -50, -106, -50, -128, -128, -112, -113, -128, - -68, -104, -73, -114, -128, -118, -50, -115, -105 ] + -1, -136, -1, -3, -10, -136, -136, -2, -3, -136, + -14, -14, -136, -136, -136, -136, -136, -136, -136, -28, + -29, -34, -35, -36, -136, -136, -136, -136, -136, -136, + -136, -136, -136, -54, -54, -54, -136, -136, -136, -136, + -136, -136, -136, -13, 231, -4, -136, -14, -16, -17, + -20, -131, -100, -101, -130, -18, -23, -89, -24, -25, + -136, -27, -37, -136, -136, -136, -41, -42, -43, -44, + -45, -46, -55, -136, -47, -136, -48, -49, -92, -136, + -95, -97, -98, -50, -51, -52, -53, -136, -136, -11, + -5, -7, -14, -136, -72, -15, -21, -131, -132, -133, + -134, -19, -136, -26, -30, -31, -32, -38, -87, -88, + -135, -39, -40, -136, -56, -58, -60, -136, -83, -85, + -93, -94, -96, -136, -136, -136, -136, -136, -6, -8, + -9, -128, -104, -102, -105, -73, -136, -136, -136, -90, + -33, -59, -57, -61, -80, -86, -84, -99, -136, -66, + -70, -136, -12, -136, -103, -109, -136, -22, -136, -62, + -81, -82, -54, -136, -64, -68, -71, -74, -136, -129, + -106, -107, -127, -91, -136, -67, -70, -72, -100, -72, + -136, -124, -136, -109, -100, -110, -72, -72, -136, -70, + -69, -75, -76, -116, -117, -118, -136, -78, -79, -136, + -70, -108, -136, -111, -72, -54, -115, -63, -136, -100, + -119, -125, -65, -136, -54, -114, -54, -136, -136, -120, + -121, -136, -72, -112, -77, -122, -136, -126, -54, -123, + -113 ] racc_goto_table = [ - 69, 109, 50, 152, 57, 127, 84, 58, 112, 160, - 114, 59, 60, 61, 86, 52, 54, 55, 98, 102, - 103, 159, 106, 110, 175, 74, 74, 74, 74, 138, - 9, 1, 3, 180, 7, 43, 120, 160, 109, 109, - 195, 192, 121, 94, 119, 112, 40, 137, 118, 189, - 47, 200, 86, 92, 175, 156, 130, 131, 132, 107, - 135, 136, 88, 196, 111, 207, 111, 70, 72, 201, - 73, 77, 78, 79, 67, 147, 134, 178, 148, 149, - 93, 146, 124, 166, 179, 214, 185, 158, 208, 174, - 187, 209, 191, 193, 107, 107, 143, nil, nil, 186, - nil, 111, nil, 111, nil, nil, 194, nil, 166, nil, - 202, nil, nil, nil, 198, nil, nil, nil, 163, 174, - 198, nil, nil, nil, nil, nil, nil, nil, 216, nil, - nil, nil, nil, nil, nil, 213, 198, nil, nil, nil, + 73, 118, 136, 54, 48, 49, 164, 96, 91, 120, + 121, 93, 187, 148, 107, 111, 112, 119, 134, 171, + 56, 58, 59, 3, 61, 7, 78, 78, 78, 78, + 62, 63, 64, 65, 115, 74, 76, 192, 1, 129, + 168, 95, 187, 118, 118, 207, 204, 201, 77, 83, + 84, 85, 128, 138, 147, 93, 212, 140, 154, 145, + 146, 101, 130, 116, 42, 127, 103, 208, 78, 78, + 219, 9, 51, 213, 141, 142, 45, 71, 159, 144, + 190, 160, 161, 102, 158, 191, 132, 197, 122, 226, + 170, 177, 220, 199, 203, 205, 221, 186, 153, nil, + nil, nil, nil, 116, 116, nil, 198, nil, nil, nil, + nil, nil, 214, 78, 206, nil, 177, nil, nil, nil, + nil, nil, 210, nil, nil, nil, nil, 186, 210, 174, + 228, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, 225, 210, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 203, nil, nil, nil, nil, nil, nil, nil, nil, - 211, nil, 212, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, 218 ] + nil, nil, 215, nil, nil, nil, nil, nil, nil, nil, + nil, 223, nil, 224, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 230 ] racc_goto_check = [ - 27, 20, 29, 33, 15, 40, 8, 15, 46, 39, - 46, 15, 15, 15, 12, 16, 16, 16, 22, 22, - 22, 50, 28, 43, 38, 29, 29, 29, 29, 32, - 7, 1, 6, 36, 6, 7, 5, 39, 20, 20, - 33, 36, 9, 15, 8, 46, 10, 46, 11, 50, - 13, 33, 12, 16, 38, 32, 22, 28, 28, 29, - 43, 43, 14, 37, 29, 36, 29, 24, 24, 37, - 25, 25, 25, 25, 23, 30, 31, 34, 41, 42, - 44, 45, 48, 20, 40, 37, 40, 49, 51, 20, - 52, 53, 40, 40, 29, 29, 54, nil, nil, 20, - nil, 29, nil, 29, nil, nil, 20, nil, 20, nil, - 40, nil, nil, nil, 20, nil, nil, nil, 27, 20, - 20, nil, nil, nil, nil, nil, nil, nil, 40, nil, - nil, nil, nil, nil, nil, 20, 20, nil, nil, nil, + 29, 22, 42, 31, 14, 14, 35, 16, 8, 48, + 48, 13, 40, 34, 24, 24, 24, 45, 52, 54, + 18, 18, 18, 6, 17, 6, 31, 31, 31, 31, + 17, 17, 17, 17, 30, 26, 26, 38, 1, 5, + 34, 14, 40, 22, 22, 35, 38, 54, 27, 27, + 27, 27, 8, 16, 48, 13, 35, 24, 52, 45, + 45, 18, 9, 31, 10, 11, 17, 39, 31, 31, + 38, 7, 15, 39, 30, 30, 7, 25, 32, 33, + 36, 43, 44, 46, 47, 42, 14, 42, 50, 39, + 53, 22, 55, 56, 42, 42, 57, 22, 58, nil, + nil, nil, nil, 31, 31, nil, 22, nil, nil, nil, + nil, nil, 42, 31, 22, nil, 22, nil, nil, nil, + nil, nil, 22, nil, nil, nil, nil, 22, 22, 29, + 42, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, 22, 22, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 27, nil, nil, nil, nil, nil, nil, nil, nil, - 27, nil, 27, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, 27 ] + nil, nil, 29, nil, nil, nil, nil, nil, nil, nil, + nil, 29, nil, 29, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 29 ] racc_goto_pointer = [ - nil, 31, nil, nil, nil, -48, 32, 27, -39, -42, - 42, -34, -31, 38, 15, -13, 2, nil, nil, nil, - -70, nil, -41, 42, 34, 35, nil, -32, -47, -10, - -59, -31, -86, -137, -88, nil, -133, -121, -135, -135, - -82, -56, -55, -48, 27, -48, -66, nil, -3, -57, - -123, -110, -80, -108, -26 ] + nil, 38, nil, nil, nil, -52, 23, 68, -38, -29, + 60, -24, nil, -35, -6, 59, -44, 6, 6, nil, + nil, nil, -74, nil, -49, 44, 1, 12, nil, -33, + -39, -10, -66, -37, -111, -144, -96, nil, -140, -129, + -159, nil, -92, -63, -62, -58, 26, -55, -69, nil, + 8, nil, -75, -65, -136, -118, -88, -115, -33 ] racc_goto_default = [ - nil, nil, 2, 8, 83, nil, nil, nil, nil, nil, - nil, nil, 10, nil, nil, 51, nil, 21, 22, 23, - 95, 97, nil, nil, nil, nil, 105, 71, nil, 99, - nil, nil, nil, nil, 153, 126, nil, nil, 168, 155, - nil, 100, nil, nil, nil, nil, 75, 85, nil, nil, - nil, nil, nil, nil, nil ] + nil, nil, 2, 8, 90, nil, nil, nil, nil, nil, + nil, nil, 10, 11, nil, nil, nil, 55, nil, 21, + 22, 23, 104, 106, nil, nil, nil, nil, 114, 75, + nil, 108, nil, nil, nil, nil, 165, 135, nil, nil, + 179, 167, nil, 109, nil, nil, nil, nil, 81, 80, + 82, 92, nil, nil, nil, nil, nil, nil, nil ] racc_reduce_table = [ 0, 0, :racc_error, - 0, 63, :_reduce_1, - 2, 63, :_reduce_2, - 0, 64, :_reduce_3, - 2, 64, :_reduce_4, - 1, 65, :_reduce_5, - 2, 65, :_reduce_6, - 0, 66, :_reduce_none, - 1, 66, :_reduce_none, - 5, 58, :_reduce_none, - 0, 67, :_reduce_10, - 0, 68, :_reduce_11, - 5, 59, :_reduce_12, - 2, 59, :_reduce_none, - 1, 73, :_reduce_14, - 2, 73, :_reduce_15, - 1, 60, :_reduce_none, - 2, 60, :_reduce_17, - 3, 60, :_reduce_18, - 2, 60, :_reduce_none, - 2, 60, :_reduce_20, - 2, 60, :_reduce_21, - 3, 60, :_reduce_22, - 2, 60, :_reduce_23, - 1, 60, :_reduce_24, - 1, 60, :_reduce_25, - 2, 60, :_reduce_none, - 1, 78, :_reduce_27, - 1, 78, :_reduce_28, - 1, 79, :_reduce_29, - 2, 79, :_reduce_30, - 2, 69, :_reduce_31, - 1, 69, :_reduce_none, - 1, 69, :_reduce_none, - 1, 69, :_reduce_none, - 3, 69, :_reduce_35, - 3, 69, :_reduce_36, - 3, 69, :_reduce_37, - 2, 69, :_reduce_38, - 2, 69, :_reduce_39, - 2, 69, :_reduce_40, - 2, 69, :_reduce_41, - 2, 69, :_reduce_42, - 2, 74, :_reduce_none, - 2, 74, :_reduce_44, - 2, 74, :_reduce_45, - 2, 74, :_reduce_46, - 2, 74, :_reduce_47, - 2, 74, :_reduce_48, - 2, 74, :_reduce_49, - 0, 84, :_reduce_none, - 1, 84, :_reduce_none, - 1, 85, :_reduce_52, - 2, 85, :_reduce_53, - 2, 80, :_reduce_54, - 3, 80, :_reduce_55, - 0, 88, :_reduce_none, - 1, 88, :_reduce_none, - 3, 83, :_reduce_58, - 8, 75, :_reduce_59, - 5, 76, :_reduce_60, - 8, 76, :_reduce_61, - 1, 89, :_reduce_62, - 3, 89, :_reduce_63, - 1, 90, :_reduce_64, - 3, 90, :_reduce_65, - 0, 96, :_reduce_none, - 1, 96, :_reduce_none, - 0, 97, :_reduce_none, - 1, 97, :_reduce_none, - 1, 91, :_reduce_70, - 3, 91, :_reduce_71, - 3, 91, :_reduce_72, - 6, 91, :_reduce_73, - 3, 91, :_reduce_74, - 3, 91, :_reduce_75, - 0, 99, :_reduce_none, - 1, 99, :_reduce_none, - 1, 87, :_reduce_78, - 1, 100, :_reduce_79, - 2, 100, :_reduce_80, - 2, 81, :_reduce_81, - 3, 81, :_reduce_82, - 1, 77, :_reduce_none, - 1, 77, :_reduce_none, - 0, 101, :_reduce_85, - 0, 102, :_reduce_86, - 5, 72, :_reduce_87, - 1, 103, :_reduce_88, - 2, 103, :_reduce_89, - 1, 82, :_reduce_90, - 2, 82, :_reduce_91, - 3, 82, :_reduce_92, - 1, 86, :_reduce_93, - 1, 86, :_reduce_94, - 0, 105, :_reduce_none, - 1, 105, :_reduce_none, + 0, 64, :_reduce_1, + 2, 64, :_reduce_2, + 0, 65, :_reduce_3, + 2, 65, :_reduce_4, + 1, 66, :_reduce_5, + 2, 66, :_reduce_6, + 0, 67, :_reduce_none, + 1, 67, :_reduce_none, + 5, 59, :_reduce_none, + 0, 68, :_reduce_10, + 0, 69, :_reduce_11, + 5, 60, :_reduce_12, + 2, 60, :_reduce_13, + 0, 72, :_reduce_14, + 2, 72, :_reduce_15, 2, 61, :_reduce_none, 2, 61, :_reduce_none, - 4, 104, :_reduce_99, - 1, 106, :_reduce_100, - 3, 106, :_reduce_101, - 1, 107, :_reduce_102, - 3, 107, :_reduce_103, - 5, 107, :_reduce_104, - 7, 107, :_reduce_105, - 4, 107, :_reduce_106, - 3, 107, :_reduce_107, - 1, 93, :_reduce_108, - 1, 93, :_reduce_109, - 1, 93, :_reduce_110, - 0, 108, :_reduce_none, - 1, 108, :_reduce_none, - 2, 94, :_reduce_113, - 3, 94, :_reduce_114, - 4, 94, :_reduce_115, - 0, 109, :_reduce_116, - 0, 110, :_reduce_117, - 5, 95, :_reduce_118, - 3, 92, :_reduce_119, - 0, 111, :_reduce_120, - 3, 62, :_reduce_121, - 1, 70, :_reduce_none, - 0, 71, :_reduce_none, + 1, 76, :_reduce_18, + 2, 76, :_reduce_19, + 2, 70, :_reduce_20, + 3, 70, :_reduce_21, + 5, 70, :_reduce_22, + 2, 70, :_reduce_none, + 2, 70, :_reduce_24, + 2, 70, :_reduce_25, + 3, 70, :_reduce_26, + 2, 70, :_reduce_27, + 1, 70, :_reduce_28, + 1, 70, :_reduce_29, + 1, 81, :_reduce_30, + 1, 81, :_reduce_31, + 1, 82, :_reduce_32, + 2, 82, :_reduce_33, 1, 71, :_reduce_none, 1, 71, :_reduce_none, 1, 71, :_reduce_none, - 1, 98, :_reduce_127 ] - -racc_reduce_n = 128 - -racc_shift_n = 219 + 2, 71, :_reduce_37, + 3, 71, :_reduce_38, + 3, 71, :_reduce_39, + 3, 71, :_reduce_40, + 2, 71, :_reduce_41, + 2, 71, :_reduce_42, + 2, 71, :_reduce_43, + 2, 71, :_reduce_44, + 2, 71, :_reduce_45, + 2, 77, :_reduce_none, + 2, 77, :_reduce_47, + 2, 77, :_reduce_48, + 2, 77, :_reduce_49, + 2, 77, :_reduce_50, + 2, 77, :_reduce_51, + 2, 77, :_reduce_52, + 2, 77, :_reduce_53, + 0, 87, :_reduce_none, + 1, 87, :_reduce_none, + 1, 88, :_reduce_56, + 2, 88, :_reduce_57, + 2, 83, :_reduce_58, + 3, 83, :_reduce_59, + 0, 91, :_reduce_none, + 1, 91, :_reduce_none, + 3, 86, :_reduce_62, + 8, 78, :_reduce_63, + 5, 79, :_reduce_64, + 8, 79, :_reduce_65, + 1, 92, :_reduce_66, + 3, 92, :_reduce_67, + 1, 93, :_reduce_68, + 3, 93, :_reduce_69, + 0, 99, :_reduce_none, + 1, 99, :_reduce_none, + 0, 100, :_reduce_none, + 1, 100, :_reduce_none, + 1, 94, :_reduce_74, + 3, 94, :_reduce_75, + 3, 94, :_reduce_76, + 6, 94, :_reduce_77, + 3, 94, :_reduce_78, + 3, 94, :_reduce_79, + 0, 102, :_reduce_none, + 1, 102, :_reduce_none, + 1, 90, :_reduce_82, + 1, 103, :_reduce_83, + 2, 103, :_reduce_84, + 2, 84, :_reduce_85, + 3, 84, :_reduce_86, + 1, 80, :_reduce_none, + 1, 80, :_reduce_none, + 0, 104, :_reduce_89, + 0, 105, :_reduce_90, + 5, 75, :_reduce_91, + 1, 106, :_reduce_92, + 2, 106, :_reduce_93, + 2, 107, :_reduce_94, + 1, 108, :_reduce_95, + 2, 108, :_reduce_96, + 1, 85, :_reduce_97, + 1, 85, :_reduce_98, + 3, 85, :_reduce_99, + 1, 89, :_reduce_none, + 1, 89, :_reduce_none, + 1, 110, :_reduce_102, + 2, 110, :_reduce_103, + 2, 62, :_reduce_none, + 2, 62, :_reduce_none, + 4, 109, :_reduce_106, + 1, 111, :_reduce_107, + 3, 111, :_reduce_108, + 0, 112, :_reduce_109, + 2, 112, :_reduce_110, + 3, 112, :_reduce_111, + 5, 112, :_reduce_112, + 7, 112, :_reduce_113, + 4, 112, :_reduce_114, + 3, 112, :_reduce_115, + 1, 96, :_reduce_116, + 1, 96, :_reduce_117, + 1, 96, :_reduce_118, + 0, 113, :_reduce_none, + 1, 113, :_reduce_none, + 2, 97, :_reduce_121, + 3, 97, :_reduce_122, + 4, 97, :_reduce_123, + 0, 114, :_reduce_124, + 0, 115, :_reduce_125, + 5, 98, :_reduce_126, + 3, 95, :_reduce_127, + 0, 116, :_reduce_128, + 3, 63, :_reduce_129, + 1, 73, :_reduce_none, + 0, 74, :_reduce_none, + 1, 74, :_reduce_none, + 1, 74, :_reduce_none, + 1, 74, :_reduce_none, + 1, 101, :_reduce_135 ] + +racc_reduce_n = 136 + +racc_shift_n = 231 racc_token_table = { false => 0, @@ -1044,52 +1076,53 @@ def raise_parse_error(error_message, location) "%{" => 10, "%}" => 11, "%require" => 12, - "%expect" => 13, - "%define" => 14, - "%param" => 15, - "%lex-param" => 16, - "%parse-param" => 17, - "%code" => 18, - "%initial-action" => 19, - "%no-stdlib" => 20, - "%locations" => 21, - ";" => 22, - "%union" => 23, - "%destructor" => 24, - "%printer" => 25, - "%error-token" => 26, - "%after-shift" => 27, - "%before-reduce" => 28, - "%after-reduce" => 29, - "%after-shift-error-token" => 30, - "%after-pop-stack" => 31, - "-temp-group" => 32, - "%token" => 33, - "%type" => 34, - "%nterm" => 35, - "%left" => 36, - "%right" => 37, - "%precedence" => 38, - "%nonassoc" => 39, - "%rule" => 40, - "(" => 41, - ")" => 42, - ":" => 43, - "%inline" => 44, - "," => 45, - "|" => 46, - "%empty" => 47, - "%prec" => 48, - "{" => 49, - "}" => 50, - "?" => 51, - "+" => 52, - "*" => 53, - "[" => 54, - "]" => 55, - "{...}" => 56 } - -racc_nt_base = 57 + ";" => 13, + "%expect" => 14, + "%define" => 15, + "{" => 16, + "}" => 17, + "%param" => 18, + "%lex-param" => 19, + "%parse-param" => 20, + "%code" => 21, + "%initial-action" => 22, + "%no-stdlib" => 23, + "%locations" => 24, + "%union" => 25, + "%destructor" => 26, + "%printer" => 27, + "%error-token" => 28, + "%after-shift" => 29, + "%before-reduce" => 30, + "%after-reduce" => 31, + "%after-shift-error-token" => 32, + "%after-pop-stack" => 33, + "-temp-group" => 34, + "%token" => 35, + "%type" => 36, + "%nterm" => 37, + "%left" => 38, + "%right" => 39, + "%precedence" => 40, + "%nonassoc" => 41, + "%start" => 42, + "%rule" => 43, + "(" => 44, + ")" => 45, + ":" => 46, + "%inline" => 47, + "," => 48, + "|" => 49, + "%empty" => 50, + "%prec" => 51, + "?" => 52, + "+" => 53, + "*" => 54, + "[" => 55, + "]" => 56, + "{...}" => 57 } + +racc_nt_base = 58 racc_use_result_var = true @@ -1124,8 +1157,11 @@ def raise_parse_error(error_message, location) "\"%{\"", "\"%}\"", "\"%require\"", + "\";\"", "\"%expect\"", "\"%define\"", + "\"{\"", + "\"}\"", "\"%param\"", "\"%lex-param\"", "\"%parse-param\"", @@ -1133,7 +1169,6 @@ def raise_parse_error(error_message, location) "\"%initial-action\"", "\"%no-stdlib\"", "\"%locations\"", - "\";\"", "\"%union\"", "\"%destructor\"", "\"%printer\"", @@ -1151,6 +1186,7 @@ def raise_parse_error(error_message, location) "\"%right\"", "\"%precedence\"", "\"%nonassoc\"", + "\"%start\"", "\"%rule\"", "\"(\"", "\")\"", @@ -1160,8 +1196,6 @@ def raise_parse_error(error_message, location) "\"|\"", "\"%empty\"", "\"%prec\"", - "\"{\"", - "\"}\"", "\"?\"", "\"+\"", "\"*\"", @@ -1180,7 +1214,9 @@ def raise_parse_error(error_message, location) "\"-option@epilogue_declaration\"", "@1", "@2", + "parser_option", "grammar_declaration", + "\"-many@;\"", "variable", "value", "param", @@ -1204,9 +1240,9 @@ def raise_parse_error(error_message, location) "rule_rhs_list", "rule_rhs", "named_ref", - "parameterizing_suffix", - "parameterizing_args", - "midrule_action", + "parameterized_suffix", + "parameterized_args", + "action", "\"-option@%empty\"", "\"-option@named_ref\"", "string_as_id", @@ -1215,11 +1251,13 @@ def raise_parse_error(error_message, location) "@3", "@4", "\"-many1@id\"", + "\"-group@TAG-\\\"-many1@id\\\"\"", + "\"-many1@-group@TAG-\\\"-many1@id\\\"\"", "rules", - "\"-option@;\"", + "\"-many1@;\"", "rhs_list", "rhs", - "\"-option@parameterizing_suffix\"", + "\"-option@parameterized_suffix\"", "@5", "@6", "@7" ] @@ -1279,10 +1317,9 @@ def _reduce_6(val, _values, result) # reduce 9 omitted -module_eval(<<'.,.,', 'parser.y', 12) +module_eval(<<'.,.,', 'parser.y', 13) def _reduce_10(val, _values, result) - begin_c_declaration("%}") - @grammar.prologue_first_lineno = @lexer.line + begin_c_declaration("%}") result end @@ -1290,7 +1327,7 @@ def _reduce_10(val, _values, result) module_eval(<<'.,.,', 'parser.y', 17) def _reduce_11(val, _values, result) - end_c_declaration + end_c_declaration result end @@ -1298,22 +1335,29 @@ def _reduce_11(val, _values, result) module_eval(<<'.,.,', 'parser.y', 21) def _reduce_12(val, _values, result) - @grammar.prologue = val[2].s_value + @grammar.prologue_first_lineno = val[0].first_line + @grammar.prologue = val[2].s_value result end .,., -# reduce 13 omitted +module_eval(<<'.,.,', 'parser.y', 26) + def _reduce_13(val, _values, result) + @grammar.required = true -module_eval(<<'.,.,', 'parser.y', 54) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 34) def _reduce_14(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 54) +module_eval(<<'.,.,', 'parser.y', 34) def _reduce_15(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result @@ -1322,150 +1366,140 @@ def _reduce_15(val, _values, result) # reduce 16 omitted -module_eval(<<'.,.,', 'parser.y', 26) - def _reduce_17(val, _values, result) - @grammar.expect = val[1] +# reduce 17 omitted + +module_eval(<<'.,.,', 'parser.y', 77) + def _reduce_18(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 27) - def _reduce_18(val, _values, result) - @grammar.define[val[1].s_value] = val[2]&.s_value +module_eval(<<'.,.,', 'parser.y', 77) + def _reduce_19(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -# reduce 19 omitted - -module_eval(<<'.,.,', 'parser.y', 31) +module_eval(<<'.,.,', 'parser.y', 36) def _reduce_20(val, _values, result) - val[1].each {|token| - @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value - } + @grammar.expect = val[1].s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 37) +module_eval(<<'.,.,', 'parser.y', 40) def _reduce_21(val, _values, result) - val[1].each {|token| - @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value - } + @grammar.define[val[1].s_value] = val[2]&.s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 43) +module_eval(<<'.,.,', 'parser.y', 44) def _reduce_22(val, _values, result) - @grammar.add_percent_code(id: val[1], code: val[2]) + @grammar.define[val[1].s_value] = val[3]&.s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 47) - def _reduce_23(val, _values, result) - @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[1]) - - result - end -.,., +# reduce 23 omitted module_eval(<<'.,.,', 'parser.y', 49) def _reduce_24(val, _values, result) - @grammar.no_stdlib = true + val[1].each {|token| + @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value + } + result end .,., -module_eval(<<'.,.,', 'parser.y', 50) +module_eval(<<'.,.,', 'parser.y', 55) def _reduce_25(val, _values, result) - @grammar.locations = true + val[1].each {|token| + @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value + } + result end .,., -# reduce 26 omitted +module_eval(<<'.,.,', 'parser.y', 61) + def _reduce_26(val, _values, result) + @grammar.add_percent_code(id: val[1], code: val[2]) + + result + end +.,., -module_eval(<<'.,.,', 'parser.y', 109) +module_eval(<<'.,.,', 'parser.y', 65) def _reduce_27(val, _values, result) - result = val + @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[1]) + result end .,., -module_eval(<<'.,.,', 'parser.y', 109) +module_eval(<<'.,.,', 'parser.y', 69) def _reduce_28(val, _values, result) - result = val + @grammar.no_stdlib = true + result end .,., -module_eval(<<'.,.,', 'parser.y', 109) +module_eval(<<'.,.,', 'parser.y', 73) def _reduce_29(val, _values, result) - result = val[1] ? val[1].unshift(val[0]) : val + @grammar.locations = true + result end .,., -module_eval(<<'.,.,', 'parser.y', 109) +module_eval(<<'.,.,', 'parser.y', 133) def _reduce_30(val, _values, result) - result = val[1] ? val[1].unshift(val[0]) : val + result = val result end .,., -module_eval(<<'.,.,', 'parser.y', 55) +module_eval(<<'.,.,', 'parser.y', 133) def _reduce_31(val, _values, result) - @grammar.set_union( - Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[1]), - val[1].line - ) - + result = val result end .,., -# reduce 32 omitted - -# reduce 33 omitted - -# reduce 34 omitted - -module_eval(<<'.,.,', 'parser.y', 65) - def _reduce_35(val, _values, result) - @grammar.add_destructor( - ident_or_tags: val[2].flatten, - token_code: val[1], - lineno: val[1].line - ) - +module_eval(<<'.,.,', 'parser.y', 133) + def _reduce_32(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 73) - def _reduce_36(val, _values, result) - @grammar.add_printer( - ident_or_tags: val[2].flatten, - token_code: val[1], - lineno: val[1].line - ) - +module_eval(<<'.,.,', 'parser.y', 133) + def _reduce_33(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 81) +# reduce 34 omitted + +# reduce 35 omitted + +# reduce 36 omitted + +module_eval(<<'.,.,', 'parser.y', 82) def _reduce_37(val, _values, result) - @grammar.add_error_token( - ident_or_tags: val[2].flatten, - token_code: val[1], - lineno: val[1].line - ) + @grammar.set_union( + Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[1]), + val[1].line + ) result end @@ -1473,665 +1507,762 @@ def _reduce_37(val, _values, result) module_eval(<<'.,.,', 'parser.y', 89) def _reduce_38(val, _values, result) - @grammar.after_shift = val[1] + @grammar.add_destructor( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) result end .,., -module_eval(<<'.,.,', 'parser.y', 93) +module_eval(<<'.,.,', 'parser.y', 97) def _reduce_39(val, _values, result) - @grammar.before_reduce = val[1] + @grammar.add_printer( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) result end .,., -module_eval(<<'.,.,', 'parser.y', 97) +module_eval(<<'.,.,', 'parser.y', 105) def _reduce_40(val, _values, result) - @grammar.after_reduce = val[1] + @grammar.add_error_token( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) result end .,., -module_eval(<<'.,.,', 'parser.y', 101) +module_eval(<<'.,.,', 'parser.y', 113) def _reduce_41(val, _values, result) - @grammar.after_shift_error_token = val[1] + @grammar.after_shift = val[1] result end .,., -module_eval(<<'.,.,', 'parser.y', 105) +module_eval(<<'.,.,', 'parser.y', 117) def _reduce_42(val, _values, result) - @grammar.after_pop_stack = val[1] + @grammar.before_reduce = val[1] result end .,., -# reduce 43 omitted - -module_eval(<<'.,.,', 'parser.y', 111) - def _reduce_44(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - @grammar.add_type(id: id, tag: hash[:tag]) - } - } +module_eval(<<'.,.,', 'parser.y', 121) + def _reduce_43(val, _values, result) + @grammar.after_reduce = val[1] result end .,., -module_eval(<<'.,.,', 'parser.y', 119) - def _reduce_45(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - if @grammar.find_term_by_s_value(id.s_value) - on_action_error("symbol #{id.s_value} redeclared as a nonterminal", id) - else - @grammar.add_type(id: id, tag: hash[:tag]) - end - } - } +module_eval(<<'.,.,', 'parser.y', 125) + def _reduce_44(val, _values, result) + @grammar.after_shift_error_token = val[1] result end .,., -module_eval(<<'.,.,', 'parser.y', 131) - def _reduce_46(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - sym = @grammar.add_term(id: id) - @grammar.add_left(sym, @precedence_number) - } - } - @precedence_number += 1 +module_eval(<<'.,.,', 'parser.y', 129) + def _reduce_45(val, _values, result) + @grammar.after_pop_stack = val[1] result end .,., -module_eval(<<'.,.,', 'parser.y', 141) +# reduce 46 omitted + +module_eval(<<'.,.,', 'parser.y', 136) def _reduce_47(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - sym = @grammar.add_term(id: id) - @grammar.add_right(sym, @precedence_number) - } - } - @precedence_number += 1 + val[1].each {|hash| + hash[:tokens].each {|id| + @grammar.add_type(id: id, tag: hash[:tag]) + } + } result end .,., -module_eval(<<'.,.,', 'parser.y', 151) +module_eval(<<'.,.,', 'parser.y', 144) def _reduce_48(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - sym = @grammar.add_term(id: id) - @grammar.add_precedence(sym, @precedence_number) - } - } - @precedence_number += 1 + val[1].each {|hash| + hash[:tokens].each {|id| + if @grammar.find_term_by_s_value(id.s_value) + on_action_error("symbol #{id.s_value} redeclared as a nonterminal", id) + else + @grammar.add_type(id: id, tag: hash[:tag]) + end + } + } result end .,., -module_eval(<<'.,.,', 'parser.y', 161) +module_eval(<<'.,.,', 'parser.y', 156) def _reduce_49(val, _values, result) - val[1].each {|hash| - hash[:tokens].each {|id| - sym = @grammar.add_term(id: id) - @grammar.add_nonassoc(sym, @precedence_number) - } - } - @precedence_number += 1 + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id, tag: hash[:tag]) + @grammar.add_left(sym, @precedence_number, id.s_value, id.first_line) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 166) + def _reduce_50(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id, tag: hash[:tag]) + @grammar.add_right(sym, @precedence_number, id.s_value, id.first_line) + } + } + @precedence_number += 1 result end .,., -# reduce 50 omitted +module_eval(<<'.,.,', 'parser.y', 176) + def _reduce_51(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id, tag: hash[:tag]) + @grammar.add_precedence(sym, @precedence_number, id.s_value, id.first_line) + } + } + @precedence_number += 1 -# reduce 51 omitted + result + end +.,., -module_eval(<<'.,.,', 'parser.y', 184) +module_eval(<<'.,.,', 'parser.y', 186) def _reduce_52(val, _values, result) - result = val[1] ? val[1].unshift(val[0]) : val + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id, tag: hash[:tag]) + @grammar.add_nonassoc(sym, @precedence_number, id.s_value, id.first_line) + } + } + @precedence_number += 1 + result end .,., -module_eval(<<'.,.,', 'parser.y', 184) +module_eval(<<'.,.,', 'parser.y', 196) def _reduce_53(val, _values, result) + @grammar.set_start_nterm(val[1]) + + result + end +.,., + +# reduce 54 omitted + +# reduce 55 omitted + +module_eval(<<'.,.,', 'parser.y', 214) + def _reduce_56(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 214) + def _reduce_57(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 172) - def _reduce_54(val, _values, result) - val[1].each {|token_declaration| - @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[0], replace: true) - } +module_eval(<<'.,.,', 'parser.y', 202) + def _reduce_58(val, _values, result) + val[1].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1]&.s_value, tag: val[0], replace: true) + } result end .,., -module_eval(<<'.,.,', 'parser.y', 178) - def _reduce_55(val, _values, result) - val[2].each {|token_declaration| - @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[1], replace: true) - } +module_eval(<<'.,.,', 'parser.y', 208) + def _reduce_59(val, _values, result) + val[2].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1]&.s_value, tag: val[1], replace: true) + } result end .,., -# reduce 56 omitted +# reduce 60 omitted -# reduce 57 omitted +# reduce 61 omitted -module_eval(<<'.,.,', 'parser.y', 183) - def _reduce_58(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 213) + def _reduce_62(val, _values, result) result = val result end .,., -module_eval(<<'.,.,', 'parser.y', 187) - def _reduce_59(val, _values, result) - rule = Grammar::ParameterizingRule::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) - @grammar.add_parameterizing_rule(rule) +module_eval(<<'.,.,', 'parser.y', 218) + def _reduce_63(val, _values, result) + rule = Grammar::Parameterized::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) + @grammar.add_parameterized_rule(rule) result end .,., -module_eval(<<'.,.,', 'parser.y', 193) - def _reduce_60(val, _values, result) - rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true) - @grammar.add_parameterizing_rule(rule) +module_eval(<<'.,.,', 'parser.y', 225) + def _reduce_64(val, _values, result) + rule = Grammar::Parameterized::Rule.new(val[2].s_value, [], val[4], is_inline: true) + @grammar.add_parameterized_rule(rule) result end .,., -module_eval(<<'.,.,', 'parser.y', 198) - def _reduce_61(val, _values, result) - rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, val[4], val[7], is_inline: true) - @grammar.add_parameterizing_rule(rule) +module_eval(<<'.,.,', 'parser.y', 230) + def _reduce_65(val, _values, result) + rule = Grammar::Parameterized::Rule.new(val[2].s_value, val[4], val[7], is_inline: true) + @grammar.add_parameterized_rule(rule) result end .,., -module_eval(<<'.,.,', 'parser.y', 202) - def _reduce_62(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 235) + def _reduce_66(val, _values, result) result = [val[0]] result end .,., -module_eval(<<'.,.,', 'parser.y', 203) - def _reduce_63(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 236) + def _reduce_67(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 207) - def _reduce_64(val, _values, result) - builder = val[0] - result = [builder] +module_eval(<<'.,.,', 'parser.y', 241) + def _reduce_68(val, _values, result) + builder = val[0] + result = [builder] result end .,., -module_eval(<<'.,.,', 'parser.y', 212) - def _reduce_65(val, _values, result) - builder = val[2] - result = val[0].append(builder) +module_eval(<<'.,.,', 'parser.y', 246) + def _reduce_69(val, _values, result) + builder = val[2] + result = val[0].append(builder) result end .,., -# reduce 66 omitted +# reduce 70 omitted -# reduce 67 omitted +# reduce 71 omitted -# reduce 68 omitted +# reduce 72 omitted -# reduce 69 omitted +# reduce 73 omitted -module_eval(<<'.,.,', 'parser.y', 218) - def _reduce_70(val, _values, result) - reset_precs - result = Grammar::ParameterizingRule::Rhs.new +module_eval(<<'.,.,', 'parser.y', 253) + def _reduce_74(val, _values, result) + reset_precs + result = Grammar::Parameterized::Rhs.new result end .,., -module_eval(<<'.,.,', 'parser.y', 223) - def _reduce_71(val, _values, result) - token = val[1] - token.alias_name = val[2] - builder = val[0] - builder.symbols << token - result = builder +module_eval(<<'.,.,', 'parser.y', 258) + def _reduce_75(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.symbols << token + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 231) - def _reduce_72(val, _values, result) - builder = val[0] - builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) - result = builder +module_eval(<<'.,.,', 'parser.y', 267) + def _reduce_76(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 237) - def _reduce_73(val, _values, result) - builder = val[0] - builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) - result = builder +module_eval(<<'.,.,', 'parser.y', 274) + def _reduce_77(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 243) - def _reduce_74(val, _values, result) - user_code = val[1] - user_code.alias_name = val[2] - builder = val[0] - builder.user_code = user_code - result = builder +module_eval(<<'.,.,', 'parser.y', 281) + def _reduce_78(val, _values, result) + user_code = val[1] + user_code.alias_name = val[2] + builder = val[0] + builder.user_code = user_code + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 251) - def _reduce_75(val, _values, result) - sym = @grammar.find_symbol_by_id!(val[2]) - @prec_seen = true - builder = val[0] - builder.precedence_sym = sym - result = builder +module_eval(<<'.,.,', 'parser.y', 289) + def _reduce_79(val, _values, result) + on_action_error("multiple %prec in a rule", val[0]) if prec_seen? + sym = @grammar.find_symbol_by_id!(val[2]) + if val[0].rhs.empty? + @opening_prec_seen = true + else + @trailing_prec_seen = true + end + builder = val[0] + builder.precedence_sym = sym + result = builder result end .,., -# reduce 76 omitted +# reduce 80 omitted -# reduce 77 omitted +# reduce 81 omitted -module_eval(<<'.,.,', 'parser.y', 258) - def _reduce_78(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 301) + def _reduce_82(val, _values, result) result = val[0].s_value if val[0] result end .,., -module_eval(<<'.,.,', 'parser.y', 271) - def _reduce_79(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 315) + def _reduce_83(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 271) - def _reduce_80(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 315) + def _reduce_84(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 262) - def _reduce_81(val, _values, result) - result = if val[0] - [{tag: val[0], tokens: val[1]}] - else - [{tag: nil, tokens: val[1]}] - end +module_eval(<<'.,.,', 'parser.y', 306) + def _reduce_85(val, _values, result) + result = if val[0] + [{tag: val[0], tokens: val[1]}] + else + [{tag: nil, tokens: val[1]}] + end result end .,., -module_eval(<<'.,.,', 'parser.y', 268) - def _reduce_82(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 312) + def _reduce_86(val, _values, result) result = val[0].append({tag: val[1], tokens: val[2]}) result end .,., -# reduce 83 omitted +# reduce 87 omitted -# reduce 84 omitted +# reduce 88 omitted -module_eval(<<'.,.,', 'parser.y', 274) - def _reduce_85(val, _values, result) - begin_c_declaration("}") +module_eval(<<'.,.,', 'parser.y', 321) + def _reduce_89(val, _values, result) + begin_c_declaration("}") result end .,., -module_eval(<<'.,.,', 'parser.y', 278) - def _reduce_86(val, _values, result) - end_c_declaration +module_eval(<<'.,.,', 'parser.y', 325) + def _reduce_90(val, _values, result) + end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 282) - def _reduce_87(val, _values, result) - result = val[2] +module_eval(<<'.,.,', 'parser.y', 329) + def _reduce_91(val, _values, result) + result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_88(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_92(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_89(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_93(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 285) - def _reduce_90(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_94(val, _values, result) + result = val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_95(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_96(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 333) + def _reduce_97(val, _values, result) result = [{tag: nil, tokens: val[0]}] result end .,., -module_eval(<<'.,.,', 'parser.y', 286) - def _reduce_91(val, _values, result) - result = [{tag: val[0], tokens: val[1]}] +module_eval(<<'.,.,', 'parser.y', 334) + def _reduce_98(val, _values, result) + result = val[0].map {|tag, ids| {tag: tag, tokens: ids} } result end .,., -module_eval(<<'.,.,', 'parser.y', 287) - def _reduce_92(val, _values, result) - result = val[0].append({tag: val[1], tokens: val[2]}) +module_eval(<<'.,.,', 'parser.y', 335) + def _reduce_99(val, _values, result) + result = [{tag: nil, tokens: val[0]}, {tag: val[1], tokens: val[2]}] result end .,., -module_eval(<<'.,.,', 'parser.y', 289) - def _reduce_93(val, _values, result) - on_action_error("ident after %prec", val[0]) if @prec_seen +# reduce 100 omitted + +# reduce 101 omitted + +module_eval(<<'.,.,', 'parser.y', 346) + def _reduce_102(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_94(val, _values, result) - on_action_error("char after %prec", val[0]) if @prec_seen +module_eval(<<'.,.,', 'parser.y', 346) + def _reduce_103(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val result end .,., -# reduce 95 omitted +# reduce 104 omitted -# reduce 96 omitted +# reduce 105 omitted -# reduce 97 omitted +module_eval(<<'.,.,', 'parser.y', 348) + def _reduce_106(val, _values, result) + lhs = val[0] + lhs.alias_name = val[1] + val[3].each do |builder| + builder.lhs = lhs + builder.complete_input + @grammar.add_rule_builder(builder) + end -# reduce 98 omitted + result + end +.,., -module_eval(<<'.,.,', 'parser.y', 298) - def _reduce_99(val, _values, result) - lhs = val[0] - lhs.alias_name = val[1] - val[3].each do |builder| - builder.lhs = lhs - builder.complete_input - @grammar.add_rule_builder(builder) - end +module_eval(<<'.,.,', 'parser.y', 360) + def _reduce_107(val, _values, result) + if val[0].rhs.count > 1 + empties = val[0].rhs.select { |sym| sym.is_a?(Lrama::Lexer::Token::Empty) } + empties.each do |empty| + on_action_error("%empty on non-empty rule", empty) + end + end + builder = val[0] + if !builder.line + builder.line = @lexer.line - 1 + end + result = [builder] result end .,., -module_eval(<<'.,.,', 'parser.y', 309) - def _reduce_100(val, _values, result) - builder = val[0] - if !builder.line - builder.line = @lexer.line - 1 - end - result = [builder] +module_eval(<<'.,.,', 'parser.y', 374) + def _reduce_108(val, _values, result) + builder = val[2] + if !builder.line + builder.line = @lexer.line - 1 + end + result = val[0].append(builder) result end .,., -module_eval(<<'.,.,', 'parser.y', 317) - def _reduce_101(val, _values, result) - builder = val[2] - if !builder.line - builder.line = @lexer.line - 1 - end - result = val[0].append(builder) +module_eval(<<'.,.,', 'parser.y', 384) + def _reduce_109(val, _values, result) + reset_precs + result = @grammar.create_rule_builder(@rule_counter, @midrule_action_counter) result end .,., -module_eval(<<'.,.,', 'parser.y', 326) - def _reduce_102(val, _values, result) - reset_precs - result = @grammar.create_rule_builder(@rule_counter, @midrule_action_counter) +module_eval(<<'.,.,', 'parser.y', 389) + def _reduce_110(val, _values, result) + builder = val[0] + builder.add_rhs(Lrama::Lexer::Token::Empty.new(location: @lexer.location)) + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 331) - def _reduce_103(val, _values, result) - token = val[1] - token.alias_name = val[2] - builder = val[0] - builder.add_rhs(token) - result = builder +module_eval(<<'.,.,', 'parser.y', 395) + def _reduce_111(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.add_rhs(token) + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 339) - def _reduce_104(val, _values, result) - token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], alias_name: val[3], location: @lexer.location, args: [val[1]], lhs_tag: val[4]) - builder = val[0] - builder.add_rhs(token) - builder.line = val[1].first_line - result = builder +module_eval(<<'.,.,', 'parser.y', 404) + def _reduce_112(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], alias_name: val[3], location: @lexer.location, args: [val[1]], lhs_tag: val[4]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 347) - def _reduce_105(val, _values, result) - token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, alias_name: val[5], location: @lexer.location, args: val[3], lhs_tag: val[6]) - builder = val[0] - builder.add_rhs(token) - builder.line = val[1].first_line - result = builder +module_eval(<<'.,.,', 'parser.y', 413) + def _reduce_113(val, _values, result) + on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, alias_name: val[5], location: @lexer.location, args: val[3], lhs_tag: val[6]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 355) - def _reduce_106(val, _values, result) - user_code = val[1] - user_code.alias_name = val[2] - user_code.tag = val[3] - builder = val[0] - builder.user_code = user_code - result = builder +module_eval(<<'.,.,', 'parser.y', 422) + def _reduce_114(val, _values, result) + user_code = val[1] + user_code.alias_name = val[2] + user_code.tag = val[3] + builder = val[0] + builder.user_code = user_code + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 364) - def _reduce_107(val, _values, result) - sym = @grammar.find_symbol_by_id!(val[2]) - @prec_seen = true - builder = val[0] - builder.precedence_sym = sym - result = builder +module_eval(<<'.,.,', 'parser.y', 431) + def _reduce_115(val, _values, result) + on_action_error("multiple %prec in a rule", val[0]) if prec_seen? + sym = @grammar.find_symbol_by_id!(val[2]) + if val[0].rhs.empty? + @opening_prec_seen = true + else + @trailing_prec_seen = true + end + builder = val[0] + builder.precedence_sym = sym + result = builder result end .,., -module_eval(<<'.,.,', 'parser.y', 371) - def _reduce_108(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 444) + def _reduce_116(val, _values, result) result = "option" result end .,., -module_eval(<<'.,.,', 'parser.y', 372) - def _reduce_109(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 445) + def _reduce_117(val, _values, result) result = "nonempty_list" result end .,., -module_eval(<<'.,.,', 'parser.y', 373) - def _reduce_110(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 446) + def _reduce_118(val, _values, result) result = "list" result end .,., -# reduce 111 omitted +# reduce 119 omitted -# reduce 112 omitted +# reduce 120 omitted -module_eval(<<'.,.,', 'parser.y', 377) - def _reduce_113(val, _values, result) - result = if val[1] - [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] - else - [val[0]] - end +module_eval(<<'.,.,', 'parser.y', 451) + def _reduce_121(val, _values, result) + result = if val[1] + [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] + else + [val[0]] + end result end .,., -module_eval(<<'.,.,', 'parser.y', 383) - def _reduce_114(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 457) + def _reduce_122(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 384) - def _reduce_115(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 458) + def _reduce_123(val, _values, result) result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] result end .,., -module_eval(<<'.,.,', 'parser.y', 388) - def _reduce_116(val, _values, result) - if @prec_seen - on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec - @code_after_prec = true - end - begin_c_declaration("}") +module_eval(<<'.,.,', 'parser.y', 463) + def _reduce_124(val, _values, result) + if prec_seen? + on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec + @code_after_prec = true + end + begin_c_declaration("}") result end .,., -module_eval(<<'.,.,', 'parser.y', 396) - def _reduce_117(val, _values, result) - end_c_declaration +module_eval(<<'.,.,', 'parser.y', 471) + def _reduce_125(val, _values, result) + end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 400) - def _reduce_118(val, _values, result) - result = val[2] +module_eval(<<'.,.,', 'parser.y', 475) + def _reduce_126(val, _values, result) + result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 403) - def _reduce_119(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 478) + def _reduce_127(val, _values, result) result = val[1].s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 407) - def _reduce_120(val, _values, result) - begin_c_declaration('\Z') - @grammar.epilogue_first_lineno = @lexer.line + 1 +module_eval(<<'.,.,', 'parser.y', 483) + def _reduce_128(val, _values, result) + begin_c_declaration('\Z') result end .,., -module_eval(<<'.,.,', 'parser.y', 412) - def _reduce_121(val, _values, result) - end_c_declaration - @grammar.epilogue = val[2].s_value +module_eval(<<'.,.,', 'parser.y', 487) + def _reduce_129(val, _values, result) + end_c_declaration + @grammar.epilogue_first_lineno = val[0].first_line + 1 + @grammar.epilogue = val[2].s_value result end .,., -# reduce 122 omitted +# reduce 130 omitted -# reduce 123 omitted +# reduce 131 omitted -# reduce 124 omitted +# reduce 132 omitted -# reduce 125 omitted +# reduce 133 omitted -# reduce 126 omitted +# reduce 134 omitted -module_eval(<<'.,.,', 'parser.y', 423) - def _reduce_127(val, _values, result) - result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) +module_eval(<<'.,.,', 'parser.y', 499) + def _reduce_135(val, _values, result) + result = Lrama::Lexer::Token::Ident.new(s_value: val[0].s_value) result end .,., diff --git a/tool/lrama/lib/lrama/report.rb b/tool/lrama/lib/lrama/report.rb deleted file mode 100644 index 890e5f1e8c5cc4..00000000000000 --- a/tool/lrama/lib/lrama/report.rb +++ /dev/null @@ -1,4 +0,0 @@ -# frozen_string_literal: true - -require_relative 'report/duration' -require_relative 'report/profile' diff --git a/tool/lrama/lib/lrama/report/duration.rb b/tool/lrama/lib/lrama/report/duration.rb deleted file mode 100644 index fe09a0d028f501..00000000000000 --- a/tool/lrama/lib/lrama/report/duration.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Report - module Duration - def self.enable - @_report_duration_enabled = true - end - - def self.enabled? - !!@_report_duration_enabled - end - - def report_duration(method_name) - time1 = Time.now.to_f - result = yield - time2 = Time.now.to_f - - if Duration.enabled? - puts sprintf("%s %10.5f s", method_name, time2 - time1) - end - - return result - end - end - end -end diff --git a/tool/lrama/lib/lrama/report/profile.rb b/tool/lrama/lib/lrama/report/profile.rb deleted file mode 100644 index 10488cf9130bd1..00000000000000 --- a/tool/lrama/lib/lrama/report/profile.rb +++ /dev/null @@ -1,16 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class Report - module Profile - # See "Profiling Lrama" in README.md for how to use. - def self.report_profile - require "stackprof" - - StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do - yield - end - end - end - end -end diff --git a/tool/lrama/lib/lrama/reporter.rb b/tool/lrama/lib/lrama/reporter.rb new file mode 100644 index 00000000000000..ed25cc7f8fcaf2 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter.rb @@ -0,0 +1,39 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require_relative 'reporter/conflicts' +require_relative 'reporter/grammar' +require_relative 'reporter/precedences' +require_relative 'reporter/profile' +require_relative 'reporter/rules' +require_relative 'reporter/states' +require_relative 'reporter/terms' + +module Lrama + class Reporter + include Lrama::Tracer::Duration + + # @rbs (**bool options) -> void + def initialize(**options) + @options = options + @rules = Rules.new(**options) + @terms = Terms.new(**options) + @conflicts = Conflicts.new + @precedences = Precedences.new + @grammar = Grammar.new(**options) + @states = States.new(**options) + end + + # @rbs (File io, Lrama::States states) -> void + def report(io, states) + report_duration(:report) do + report_duration(:report_rules) { @rules.report(io, states) } + report_duration(:report_terms) { @terms.report(io, states) } + report_duration(:report_conflicts) { @conflicts.report(io, states) } + report_duration(:report_precedences) { @precedences.report(io, states) } + report_duration(:report_grammar) { @grammar.report(io, states) } + report_duration(:report_states) { @states.report(io, states, ielr: states.ielr_defined?) } + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/conflicts.rb b/tool/lrama/lib/lrama/reporter/conflicts.rb new file mode 100644 index 00000000000000..f4d8c604c9efa5 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/conflicts.rb @@ -0,0 +1,44 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class Conflicts + # @rbs (IO io, Lrama::States states) -> void + def report(io, states) + report_conflicts(io, states) + end + + private + + # @rbs (IO io, Lrama::States states) -> void + def report_conflicts(io, states) + has_conflict = false + + states.states.each do |state| + messages = format_conflict_messages(state.conflicts) + + unless messages.empty? + has_conflict = true + io << "State #{state.id} conflicts: #{messages.join(', ')}\n" + end + end + + io << "\n\n" if has_conflict + end + + # @rbs (Array[(Lrama::State::ShiftReduceConflict | Lrama::State::ReduceReduceConflict)] conflicts) -> Array[String] + def format_conflict_messages(conflicts) + conflict_types = { + shift_reduce: "shift/reduce", + reduce_reduce: "reduce/reduce" + } + + conflict_types.keys.map do |type| + type_conflicts = conflicts.select { |c| c.type == type } + "#{type_conflicts.count} #{conflict_types[type]}" unless type_conflicts.empty? + end.compact + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/grammar.rb b/tool/lrama/lib/lrama/reporter/grammar.rb new file mode 100644 index 00000000000000..dc3f3f6bfd9a22 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/grammar.rb @@ -0,0 +1,39 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class Grammar + # @rbs (?grammar: bool, **bool _) -> void + def initialize(grammar: false, **_) + @grammar = grammar + end + + # @rbs (IO io, Lrama::States states) -> void + def report(io, states) + return unless @grammar + + io << "Grammar\n" + last_lhs = nil + + states.rules.each do |rule| + if rule.empty_rule? + r = "ε" + else + r = rule.rhs.map(&:display_name).join(" ") + end + + if rule.lhs == last_lhs + io << sprintf("%5d %s| %s", rule.id, " " * rule.lhs.display_name.length, r) << "\n" + else + io << "\n" + io << sprintf("%5d %s: %s", rule.id, rule.lhs.display_name, r) << "\n" + end + + last_lhs = rule.lhs + end + io << "\n\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/precedences.rb b/tool/lrama/lib/lrama/reporter/precedences.rb new file mode 100644 index 00000000000000..73c0888700c7db --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/precedences.rb @@ -0,0 +1,54 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class Precedences + # @rbs (IO io, Lrama::States states) -> void + def report(io, states) + report_precedences(io, states) + end + + private + + # @rbs (IO io, Lrama::States states) -> void + def report_precedences(io, states) + used_precedences = states.precedences.select(&:used_by?) + + return if used_precedences.empty? + + io << "Precedences\n\n" + + used_precedences.each do |precedence| + io << " precedence on #{precedence.symbol.display_name} is used to resolve conflict on\n" + + if precedence.used_by_lalr? + io << " LALR\n" + + precedence.used_by_lalr.uniq.sort_by do |resolved_conflict| + resolved_conflict.state.id + end.each do |resolved_conflict| + io << " state #{resolved_conflict.state.id}. #{resolved_conflict.report_precedences_message}\n" + end + + io << "\n" + end + + if precedence.used_by_ielr? + io << " IELR\n" + + precedence.used_by_ielr.uniq.sort_by do |resolved_conflict| + resolved_conflict.state.id + end.each do |resolved_conflict| + io << " state #{resolved_conflict.state.id}. #{resolved_conflict.report_precedences_message}\n" + end + + io << "\n" + end + end + + io << "\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/profile.rb b/tool/lrama/lib/lrama/reporter/profile.rb new file mode 100644 index 00000000000000..b569b94d4f3549 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/profile.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +require_relative 'profile/call_stack' +require_relative 'profile/memory' diff --git a/tool/lrama/lib/lrama/reporter/profile/call_stack.rb b/tool/lrama/lib/lrama/reporter/profile/call_stack.rb new file mode 100644 index 00000000000000..8a4d44b61ca437 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/profile/call_stack.rb @@ -0,0 +1,45 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + module Profile + module CallStack + # See "Call-stack Profiling Lrama" in README.md for how to use. + # + # @rbs enabled: bool + # @rbs &: -> void + # @rbs return: StackProf::result | void + def self.report(enabled) + if enabled && require_stackprof + ex = nil #: Exception? + path = 'tmp/stackprof-cpu-myapp.dump' + + StackProf.run(mode: :cpu, raw: true, out: path) do + yield + rescue Exception => e + ex = e + end + + STDERR.puts("Call-stack Profiling result is generated on #{path}") + + if ex + raise ex + end + else + yield + end + end + + # @rbs return: bool + def self.require_stackprof + require "stackprof" + true + rescue LoadError + warn "stackprof is not installed. Please run `bundle install`." + false + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/profile/memory.rb b/tool/lrama/lib/lrama/reporter/profile/memory.rb new file mode 100644 index 00000000000000..a019581fdfe514 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/profile/memory.rb @@ -0,0 +1,44 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + module Profile + module Memory + # See "Memory Profiling Lrama" in README.md for how to use. + # + # @rbs enabled: bool + # @rbs &: -> void + # @rbs return: StackProf::result | void + def self.report(enabled) + if enabled && require_memory_profiler + ex = nil #: Exception? + + report = MemoryProfiler.report do # steep:ignore UnknownConstant + yield + rescue Exception => e + ex = e + end + + report.pretty_print(to_file: "tmp/memory_profiler.txt") + + if ex + raise ex + end + else + yield + end + end + + # @rbs return: bool + def self.require_memory_profiler + require "memory_profiler" + true + rescue LoadError + warn "memory_profiler is not installed. Please run `bundle install`." + false + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/rules.rb b/tool/lrama/lib/lrama/reporter/rules.rb new file mode 100644 index 00000000000000..3e8bf19a0a62eb --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/rules.rb @@ -0,0 +1,43 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class Rules + # @rbs (?rules: bool, **bool _) -> void + def initialize(rules: false, **_) + @rules = rules + end + + # @rbs (IO io, Lrama::States states) -> void + def report(io, states) + return unless @rules + + used_rules = states.rules.flat_map(&:rhs) + + unless used_rules.empty? + io << "Rule Usage Frequency\n\n" + frequency_counts = used_rules.each_with_object(Hash.new(0)) { |rule, counts| counts[rule] += 1 } + + frequency_counts + .select { |rule,| !rule.midrule? } + .sort_by { |rule, count| [-count, rule.name] } + .each_with_index { |(rule, count), i| io << sprintf("%5d %s (%d times)", i, rule.name, count) << "\n" } + io << "\n\n" + end + + unused_rules = states.rules.map(&:lhs).select do |rule| + !used_rules.include?(rule) && rule.token_id != 0 + end + + unless unused_rules.empty? + io << "#{unused_rules.count} Unused Rules\n\n" + unused_rules.each_with_index do |rule, index| + io << sprintf("%5d %s", index, rule.display_name) << "\n" + end + io << "\n\n" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/states.rb b/tool/lrama/lib/lrama/reporter/states.rb new file mode 100644 index 00000000000000..d152d0511a46cf --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/states.rb @@ -0,0 +1,387 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class States + # @rbs (?itemsets: bool, ?lookaheads: bool, ?solved: bool, ?counterexamples: bool, ?verbose: bool, **bool _) -> void + def initialize(itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false, **_) + @itemsets = itemsets + @lookaheads = lookaheads + @solved = solved + @counterexamples = counterexamples + @verbose = verbose + end + + # @rbs (IO io, Lrama::States states, ielr: bool) -> void + def report(io, states, ielr: false) + cex = Counterexamples.new(states) if @counterexamples + + states.compute_la_sources_for_conflicted_states + report_split_states(io, states.states) if ielr + + states.states.each do |state| + report_state_header(io, state) + report_items(io, state) + report_conflicts(io, state) + report_shifts(io, state) + report_nonassoc_errors(io, state) + report_reduces(io, state) + report_nterm_transitions(io, state) + report_conflict_resolutions(io, state) if @solved + report_counterexamples(io, state, cex) if @counterexamples && state.has_conflicts? # @type var cex: Lrama::Counterexamples + report_verbose_info(io, state, states) if @verbose + # End of Report State + io << "\n" + end + end + + private + + # @rbs (IO io, Array[Lrama::State] states) -> void + def report_split_states(io, states) + ss = states.select(&:split_state?) + + return if ss.empty? + + io << "Split States\n\n" + + ss.each do |state| + io << " State #{state.id} is split from state #{state.lalr_isocore.id}\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_state_header(io, state) + io << "State #{state.id}\n\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_items(io, state) + last_lhs = nil + list = @itemsets ? state.items : state.kernels + + list.sort_by {|i| [i.rule_id, i.position] }.each do |item| + r = item.empty_rule? ? "ε •" : item.rhs.map(&:display_name).insert(item.position, "•").join(" ") + + l = if item.lhs == last_lhs + " " * item.lhs.id.s_value.length + "|" + else + item.lhs.id.s_value + ":" + end + + la = "" + if @lookaheads && item.end_of_rule? + reduce = state.find_reduce_by_item!(item) + look_ahead = reduce.selected_look_ahead + unless look_ahead.empty? + la = " [#{look_ahead.compact.map(&:display_name).join(", ")}]" + end + end + + last_lhs = item.lhs + io << sprintf("%5i %s %s%s", item.rule_id, l, r, la) << "\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_conflicts(io, state) + return if state.conflicts.empty? + + state.conflicts.each do |conflict| + syms = conflict.symbols.map { |sym| sym.display_name } + io << " Conflict on #{syms.join(", ")}. " + + case conflict.type + when :shift_reduce + # @type var conflict: Lrama::State::ShiftReduceConflict + io << "shift/reduce(#{conflict.reduce.item.rule.lhs.display_name})\n" + + conflict.symbols.each do |token| + conflict.reduce.look_ahead_sources[token].each do |goto| # steep:ignore NoMethod + io << " #{token.display_name} comes from state #{goto.from_state.id} goto by #{goto.next_sym.display_name}\n" + end + end + when :reduce_reduce + # @type var conflict: Lrama::State::ReduceReduceConflict + io << "reduce(#{conflict.reduce1.item.rule.lhs.display_name})/reduce(#{conflict.reduce2.item.rule.lhs.display_name})\n" + + conflict.symbols.each do |token| + conflict.reduce1.look_ahead_sources[token].each do |goto| # steep:ignore NoMethod + io << " #{token.display_name} comes from state #{goto.from_state.id} goto by #{goto.next_sym.display_name}\n" + end + + conflict.reduce2.look_ahead_sources[token].each do |goto| # steep:ignore NoMethod + io << " #{token.display_name} comes from state #{goto.from_state.id} goto by #{goto.next_sym.display_name}\n" + end + end + else + raise "Unknown conflict type #{conflict.type}" + end + + io << "\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_shifts(io, state) + shifts = state.term_transitions.reject(&:not_selected) + + return if shifts.empty? + + next_syms = shifts.map(&:next_sym) + max_len = next_syms.map(&:display_name).map(&:length).max + shifts.each do |shift| + io << " #{shift.next_sym.display_name.ljust(max_len)} shift, and go to state #{shift.to_state.id}\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_nonassoc_errors(io, state) + error_symbols = state.resolved_conflicts.select { |resolved| resolved.which == :error }.map { |error| error.symbol.display_name } + + return if error_symbols.empty? + + max_len = error_symbols.map(&:length).max + error_symbols.each do |name| + io << " #{name.ljust(max_len)} error (nonassociative)\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_reduces(io, state) + reduce_pairs = [] #: Array[[Lrama::Grammar::Symbol, Lrama::State::Action::Reduce]] + + state.non_default_reduces.each do |reduce| + reduce.look_ahead&.each do |term| + reduce_pairs << [term, reduce] + end + end + + return if reduce_pairs.empty? && !state.default_reduction_rule + + max_len = [ + reduce_pairs.map(&:first).map(&:display_name).map(&:length).max || 0, + state.default_reduction_rule ? "$default".length : 0 + ].max + + reduce_pairs.sort_by { |term, _| term.number }.each do |term, reduce| + rule = reduce.item.rule + io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n" + end + + if (r = state.default_reduction_rule) + s = "$default".ljust(max_len) + + if r.initial_rule? + io << " #{s} accept\n" + else + io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_nterm_transitions(io, state) + return if state.nterm_transitions.empty? + + goto_transitions = state.nterm_transitions.sort_by do |goto| + goto.next_sym.number + end + + max_len = goto_transitions.map(&:next_sym).map do |nterm| + nterm.id.s_value.length + end.max + goto_transitions.each do |goto| + io << " #{goto.next_sym.id.s_value.ljust(max_len)} go to state #{goto.to_state.id}\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state) -> void + def report_conflict_resolutions(io, state) + return if state.resolved_conflicts.empty? + + state.resolved_conflicts.each do |resolved| + io << " #{resolved.report_message}\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::Counterexamples cex) -> void + def report_counterexamples(io, state, cex) + examples = cex.compute(state) + + examples.each do |example| + is_shift_reduce = example.type == :shift_reduce + label0 = is_shift_reduce ? "shift/reduce" : "reduce/reduce" + label1 = is_shift_reduce ? "Shift derivation" : "First Reduce derivation" + label2 = is_shift_reduce ? "Reduce derivation" : "Second Reduce derivation" + + io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" + io << " #{example.path1_item}\n" + io << " #{example.path2_item}\n" + io << " #{label1}\n" + + example.derivations1.render_strings_for_report.each do |str| + io << " #{str}\n" + end + + io << " #{label2}\n" + + example.derivations2.render_strings_for_report.each do |str| + io << " #{str}\n" + end + end + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_verbose_info(io, state, states) + report_direct_read_sets(io, state, states) + report_reads_relation(io, state, states) + report_read_sets(io, state, states) + report_includes_relation(io, state, states) + report_lookback_relation(io, state, states) + report_follow_sets(io, state, states) + report_look_ahead_sets(io, state, states) + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_direct_read_sets(io, state, states) + io << " [Direct Read sets]\n" + direct_read_sets = states.direct_read_sets + + state.nterm_transitions.each do |goto| + terms = direct_read_sets[goto] + next unless terms && !terms.empty? + + str = terms.map { |sym| sym.id.s_value }.join(", ") + io << " read #{goto.next_sym.id.s_value} shift #{str}\n" + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_reads_relation(io, state, states) + io << " [Reads Relation]\n" + + state.nterm_transitions.each do |goto| + goto2 = states.reads_relation[goto] + next unless goto2 + + goto2.each do |goto2| + io << " (State #{goto2.from_state.id}, #{goto2.next_sym.id.s_value})\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_read_sets(io, state, states) + io << " [Read sets]\n" + read_sets = states.read_sets + + state.nterm_transitions.each do |goto| + terms = read_sets[goto] + next unless terms && !terms.empty? + + terms.each do |sym| + io << " #{sym.id.s_value}\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_includes_relation(io, state, states) + io << " [Includes Relation]\n" + + state.nterm_transitions.each do |goto| + gotos = states.includes_relation[goto] + next unless gotos + + gotos.each do |goto2| + io << " (State #{state.id}, #{goto.next_sym.id.s_value}) -> (State #{goto2.from_state.id}, #{goto2.next_sym.id.s_value})\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_lookback_relation(io, state, states) + io << " [Lookback Relation]\n" + + states.rules.each do |rule| + gotos = states.lookback_relation.dig(state.id, rule.id) + next unless gotos + + gotos.each do |goto2| + io << " (Rule: #{rule.display_name}) -> (State #{goto2.from_state.id}, #{goto2.next_sym.id.s_value})\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_follow_sets(io, state, states) + io << " [Follow sets]\n" + follow_sets = states.follow_sets + + state.nterm_transitions.each do |goto| + terms = follow_sets[goto] + next unless terms + + terms.each do |sym| + io << " #{goto.next_sym.id.s_value} -> #{sym.id.s_value}\n" + end + end + + io << "\n" + end + + # @rbs (IO io, Lrama::State state, Lrama::States states) -> void + def report_look_ahead_sets(io, state, states) + io << " [Look-Ahead Sets]\n" + look_ahead_rules = [] #: Array[[Lrama::Grammar::Rule, Array[Lrama::Grammar::Symbol]]] + + states.rules.each do |rule| + syms = states.la.dig(state.id, rule.id) + next unless syms + + look_ahead_rules << [rule, syms] + end + + return if look_ahead_rules.empty? + + max_len = look_ahead_rules.flat_map { |_, syms| syms.map { |s| s.id.s_value.length } }.max + + look_ahead_rules.each do |rule, syms| + syms.each do |sym| + io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n" + end + end + + io << "\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/reporter/terms.rb b/tool/lrama/lib/lrama/reporter/terms.rb new file mode 100644 index 00000000000000..f72d8b1a1a8137 --- /dev/null +++ b/tool/lrama/lib/lrama/reporter/terms.rb @@ -0,0 +1,44 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Reporter + class Terms + # @rbs (?terms: bool, **bool _) -> void + def initialize(terms: false, **_) + @terms = terms + end + + # @rbs (IO io, Lrama::States states) -> void + def report(io, states) + return unless @terms + + look_aheads = states.states.each do |state| + state.reduces.flat_map do |reduce| + reduce.look_ahead unless reduce.look_ahead.nil? + end + end + + next_terms = states.states.flat_map do |state| + state.term_transitions.map {|shift| shift.next_sym } + end + + unused_symbols = states.terms.reject do |term| + (look_aheads + next_terms).include?(term) + end + + io << states.terms.count << " Terms\n\n" + + io << states.nterms.count << " Non-Terminals\n\n" + + unless unused_symbols.empty? + io << "#{unused_symbols.count} Unused Terms\n\n" + unused_symbols.each_with_index do |term, index| + io << sprintf("%5d %s", index, term.id.s_value) << "\n" + end + io << "\n\n" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb index 3008786ced0271..50912e094e427c 100644 --- a/tool/lrama/lib/lrama/state.rb +++ b/tool/lrama/lib/lrama/state.rb @@ -1,17 +1,62 @@ +# rbs_inline: enabled # frozen_string_literal: true -require_relative "state/reduce" +require_relative "state/action" +require_relative "state/inadequacy_annotation" +require_relative "state/item" require_relative "state/reduce_reduce_conflict" require_relative "state/resolved_conflict" -require_relative "state/shift" require_relative "state/shift_reduce_conflict" module Lrama class State - attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, - :default_reduction_rule, :closure, :items - attr_accessor :shifts, :reduces, :ielr_isocores, :lalr_isocore - + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # type conflict = State::ShiftReduceConflict | State::ReduceReduceConflict + # type transition = Action::Shift | Action::Goto + # type lookahead_set = Hash[Item, Array[Grammar::Symbol]] + # + # @id: Integer + # @accessing_symbol: Grammar::Symbol + # @kernels: Array[Item] + # @items: Array[Item] + # @items_to_state: Hash[Array[Item], State] + # @conflicts: Array[conflict] + # @resolved_conflicts: Array[ResolvedConflict] + # @default_reduction_rule: Grammar::Rule? + # @closure: Array[Item] + # @nterm_transitions: Array[Action::Goto] + # @term_transitions: Array[Action::Shift] + # @transitions: Array[transition] + # @internal_dependencies: Hash[Action::Goto, Array[Action::Goto]] + # @successor_dependencies: Hash[Action::Goto, Array[Action::Goto]] + + attr_reader :id #: Integer + attr_reader :accessing_symbol #: Grammar::Symbol + attr_reader :kernels #: Array[Item] + attr_reader :conflicts #: Array[conflict] + attr_reader :resolved_conflicts #: Array[ResolvedConflict] + attr_reader :default_reduction_rule #: Grammar::Rule? + attr_reader :closure #: Array[Item] + attr_reader :items #: Array[Item] + attr_reader :annotation_list #: Array[InadequacyAnnotation] + attr_reader :predecessors #: Array[State] + attr_reader :items_to_state #: Hash[Array[Item], State] + attr_reader :lane_items #: Hash[State, Array[[Item, Item]]] + + attr_accessor :_transitions #: Array[[Grammar::Symbol, Array[Item]]] + attr_accessor :reduces #: Array[Action::Reduce] + attr_accessor :ielr_isocores #: Array[State] + attr_accessor :lalr_isocore #: State + attr_accessor :lookaheads_recomputed #: bool + attr_accessor :follow_kernel_items #: Hash[Action::Goto, Hash[Item, bool]] + attr_accessor :always_follows #: Hash[Action::Goto, Array[Grammar::Symbol]] + attr_accessor :goto_follows #: Hash[Action::Goto, Array[Grammar::Symbol]] + + # @rbs (Integer id, Grammar::Symbol accessing_symbol, Array[Item] kernels) -> void def initialize(id, accessing_symbol, kernels) @id = id @accessing_symbol = accessing_symbol @@ -28,48 +73,72 @@ def initialize(id, accessing_symbol, kernels) @ielr_isocores = [self] @internal_dependencies = {} @successor_dependencies = {} + @annotation_list = [] + @lookaheads_recomputed = false + @follow_kernel_items = {} @always_follows = {} + @goto_follows = {} + @lhs_contributions = {} + @lane_items = {} + end + + # @rbs (State other) -> bool + def ==(other) + self.id == other.id end + # @rbs (Array[Item] closure) -> void def closure=(closure) @closure = closure @items = @kernels + @closure end + # @rbs () -> Array[Action::Reduce] def non_default_reduces reduces.reject do |reduce| reduce.rule == @default_reduction_rule end end - def compute_shifts_reduces - _shifts = {} + # @rbs () -> void + def compute_transitions_and_reduces + _transitions = {} + @_lane_items ||= {} reduces = [] items.each do |item| # TODO: Consider what should be pushed if item.end_of_rule? - reduces << Reduce.new(item) + reduces << Action::Reduce.new(item) else key = item.next_sym - _shifts[key] ||= [] - _shifts[key] << item.new_by_next_position + _transitions[key] ||= [] + @_lane_items[key] ||= [] + next_item = item.new_by_next_position + _transitions[key] << next_item + @_lane_items[key] << [item, next_item] end end # It seems Bison 3.8.2 iterates transitions order by symbol number - shifts = _shifts.sort_by do |next_sym, new_items| + transitions = _transitions.sort_by do |next_sym, to_items| next_sym.number - end.map do |next_sym, new_items| - Shift.new(next_sym, new_items.flatten) end - self.shifts = shifts.freeze + + self._transitions = transitions.freeze self.reduces = reduces.freeze end + # @rbs (Grammar::Symbol next_sym, State next_state) -> void + def set_lane_items(next_sym, next_state) + @lane_items[next_state] = @_lane_items[next_sym] + end + + # @rbs (Array[Item] items, State next_state) -> void def set_items_to_state(items, next_state) @items_to_state[items] = next_state end + # @rbs (Grammar::Rule rule, Array[Grammar::Symbol] look_ahead) -> void def set_look_ahead(rule, look_ahead) reduce = reduces.find do |r| r.rule == rule @@ -78,50 +147,78 @@ def set_look_ahead(rule, look_ahead) reduce.look_ahead = look_ahead end - def nterm_transitions - @nterm_transitions ||= transitions.select {|shift, _| shift.next_sym.nterm? } + # @rbs (Grammar::Rule rule, Hash[Grammar::Symbol, Array[Action::Goto]] sources) -> void + def set_look_ahead_sources(rule, sources) + reduce = reduces.find do |r| + r.rule == rule + end + + reduce.look_ahead_sources = sources + end + + # @rbs () -> Array[Action::Goto] + def nterm_transitions # steep:ignore + @nterm_transitions ||= transitions.select {|transition| transition.is_a?(Action::Goto) } end - def term_transitions - @term_transitions ||= transitions.select {|shift, _| shift.next_sym.term? } + # @rbs () -> Array[Action::Shift] + def term_transitions # steep:ignore + @term_transitions ||= transitions.select {|transition| transition.is_a?(Action::Shift) } end + # @rbs () -> Array[transition] def transitions - @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } + @transitions ||= _transitions.map do |next_sym, to_items| + if next_sym.term? + Action::Shift.new(self, next_sym, to_items.flatten, @items_to_state[to_items]) + else + Action::Goto.new(self, next_sym, to_items.flatten, @items_to_state[to_items]) + end + end end - def update_transition(shift, next_state) - set_items_to_state(shift.next_items, next_state) + # @rbs (transition transition, State next_state) -> void + def update_transition(transition, next_state) + set_items_to_state(transition.to_items, next_state) next_state.append_predecessor(self) - clear_transitions_cache + update_transitions_caches(transition) end - def clear_transitions_cache + # @rbs () -> void + def update_transitions_caches(transition) + new_transition = + if transition.next_sym.term? + Action::Shift.new(self, transition.next_sym, transition.to_items, @items_to_state[transition.to_items]) + else + Action::Goto.new(self, transition.next_sym, transition.to_items, @items_to_state[transition.to_items]) + end + + @transitions.delete(transition) + @transitions << new_transition @nterm_transitions = nil @term_transitions = nil - @transitions = nil + + @follow_kernel_items[new_transition] = @follow_kernel_items.delete(transition) + @always_follows[new_transition] = @always_follows.delete(transition) end + # @rbs () -> Array[Action::Shift] def selected_term_transitions - term_transitions.reject do |shift, next_state| + term_transitions.reject do |shift| shift.not_selected end end # Move to next state by sym + # + # @rbs (Grammar::Symbol sym) -> State def transition(sym) result = nil if sym.term? - term_transitions.each do |shift, next_state| - term = shift.next_sym - result = next_state if term == sym - end + result = term_transitions.find {|shift| shift.next_sym == sym }.to_state else - nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - result = next_state if nterm == sym - end + result = nterm_transitions.find {|goto| goto.next_sym == sym }.to_state end raise "Can not transit by #{sym} #{self}" if result.nil? @@ -129,12 +226,14 @@ def transition(sym) result end + # @rbs (Item item) -> Action::Reduce def find_reduce_by_item!(item) reduces.find do |r| r.item == item end || (raise "reduce is not found. #{item}") end + # @rbs (Grammar::Rule default_reduction_rule) -> void def default_reduction_rule=(default_reduction_rule) @default_reduction_rule = default_reduction_rule @@ -145,200 +244,219 @@ def default_reduction_rule=(default_reduction_rule) end end + # @rbs () -> bool def has_conflicts? !@conflicts.empty? end + # @rbs () -> Array[conflict] def sr_conflicts @conflicts.select do |conflict| conflict.type == :shift_reduce end end + # @rbs () -> Array[conflict] def rr_conflicts @conflicts.select do |conflict| conflict.type == :reduce_reduce end end + # Clear information related to conflicts. + # IELR computation re-calculates conflicts and default reduction of states + # after LALR computation. + # Call this method before IELR computation to avoid duplicated conflicts information + # is stored. + # + # @rbs () -> void + def clear_conflicts + @conflicts = [] + @resolved_conflicts = [] + @default_reduction_rule = nil + + term_transitions.each(&:clear_conflicts) + reduces.each(&:clear_conflicts) + end + + # @rbs () -> bool + def split_state? + @lalr_isocore != self + end + + # Definition 3.40 (propagate_lookaheads) + # + # @rbs (State next_state) -> lookahead_set def propagate_lookaheads(next_state) - next_state.kernels.map {|item| + next_state.kernels.map {|next_kernel| lookahead_sets = - if item.position == 1 - goto_follow_set(item.lhs) - else - kernel = kernels.find {|k| k.predecessor_item_of?(item) } + if next_kernel.position > 1 + kernel = kernels.find {|k| k.predecessor_item_of?(next_kernel) } item_lookahead_set[kernel] + else + goto_follow_set(next_kernel.lhs) end - [item, lookahead_sets & next_state.lookahead_set_filters[item]] + [next_kernel, lookahead_sets & next_state.lookahead_set_filters[next_kernel]] }.to_h end - def lookaheads_recomputed - !@item_lookahead_set.nil? - end - - def compatible_lookahead?(filtered_lookahead) + # Definition 3.43 (is_compatible) + # + # @rbs (lookahead_set filtered_lookahead) -> bool + def is_compatible?(filtered_lookahead) !lookaheads_recomputed || - @lalr_isocore.annotation_list.all? {|token, actions| - a = dominant_contribution(token, actions, item_lookahead_set) - b = dominant_contribution(token, actions, filtered_lookahead) + @lalr_isocore.annotation_list.all? {|annotation| + a = annotation.dominant_contribution(item_lookahead_set) + b = annotation.dominant_contribution(filtered_lookahead) a.nil? || b.nil? || a == b } end + # Definition 3.38 (lookahead_set_filters) + # + # @rbs () -> lookahead_set def lookahead_set_filters - kernels.map {|kernel| - [kernel, - @lalr_isocore.annotation_list.select {|token, actions| - token.term? && actions.any? {|action, contributions| - !contributions.nil? && contributions.key?(kernel) && contributions[kernel] - } - }.map {|token, _| token } - ] + @lookahead_set_filters ||= kernels.map {|kernel| + [kernel, @lalr_isocore.annotation_list.select {|annotation| annotation.contributed?(kernel) }.map(&:token)] }.to_h end - def dominant_contribution(token, actions, lookaheads) - a = actions.select {|action, contributions| - contributions.nil? || contributions.any? {|item, contributed| contributed && lookaheads[item].include?(token) } - }.map {|action, _| action } - return nil if a.empty? - a.reject {|action| - if action.is_a?(State::Shift) - action.not_selected - elsif action.is_a?(State::Reduce) - action.not_selected_symbols.include?(token) - end - } - end - + # Definition 3.27 (inadequacy_lists) + # + # @rbs () -> Hash[Grammar::Symbol, Array[Action::Shift | Action::Reduce]] def inadequacy_list return @inadequacy_list if @inadequacy_list - shift_contributions = shifts.map {|shift| - [shift.next_sym, [shift]] - }.to_h - reduce_contributions = reduces.map {|reduce| - (reduce.look_ahead || []).map {|sym| - [sym, [reduce]] - }.to_h - }.reduce(Hash.new([])) {|hash, cont| - hash.merge(cont) {|_, a, b| a | b } - } + inadequacy_list = {} - list = shift_contributions.merge(reduce_contributions) {|_, a, b| a | b } - @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } - end - - def annotation_list - return @annotation_list if @annotation_list - - @annotation_list = annotate_manifestation - @annotation_list = @items_to_state.values.map {|next_state| next_state.annotate_predecessor(self) } - .reduce(@annotation_list) {|result, annotations| - result.merge(annotations) {|_, actions_a, actions_b| - if actions_a.nil? || actions_b.nil? - actions_a || actions_b - else - actions_a.merge(actions_b) {|_, contributions_a, contributions_b| - if contributions_a.nil? || contributions_b.nil? - next contributions_a || contributions_b - end - - contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| - contributed_a || contributed_b - } - } - end - } - } + term_transitions.each do |shift| + inadequacy_list[shift.next_sym] ||= [] + inadequacy_list[shift.next_sym] << shift.dup + end + reduces.each do |reduce| + next if reduce.look_ahead.nil? + + reduce.look_ahead.each do |token| + inadequacy_list[token] ||= [] + inadequacy_list[token] << reduce.dup + end + end + + @inadequacy_list = inadequacy_list.select {|token, actions| actions.size > 1 } end + # Definition 3.30 (annotate_manifestation) + # + # @rbs () -> void def annotate_manifestation - inadequacy_list.transform_values {|actions| - actions.map {|action| - if action.is_a?(Shift) + inadequacy_list.each {|token, actions| + contribution_matrix = actions.map {|action| + if action.is_a?(Action::Shift) [action, nil] - elsif action.is_a?(Reduce) - if action.rule.empty_rule? - [action, lhs_contributions(action.rule.lhs, inadequacy_list.key(actions))] - else - contributions = kernels.map {|kernel| [kernel, kernel.rule == action.rule && kernel.end_of_rule?] }.to_h - [action, contributions] - end + else + [action, action.rule.empty_rule? ? lhs_contributions(action.rule.lhs, token) : kernels.map {|k| [k, k.rule == action.item.rule && k.end_of_rule?] }.to_h] end }.to_h + @annotation_list << InadequacyAnnotation.new(self, token, actions, contribution_matrix) } end + # Definition 3.32 (annotate_predecessor) + # + # @rbs (State predecessor) -> void def annotate_predecessor(predecessor) - annotation_list.transform_values {|actions| - token = annotation_list.key(actions) - actions.transform_values {|inadequacy| - next nil if inadequacy.nil? - lhs_adequacy = kernels.any? {|kernel| - inadequacy[kernel] && kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, token).nil? - } - if lhs_adequacy - next nil + propagating_list = annotation_list.map {|annotation| + contribution_matrix = annotation.contribution_matrix.map {|action, contributions| + if contributions.nil? + [action, nil] + elsif first_kernels.any? {|kernel| contributions[kernel] && predecessor.lhs_contributions(kernel.lhs, annotation.token).empty? } + [action, nil] else - predecessor.kernels.map {|pred_k| - [pred_k, kernels.any? {|k| - inadequacy[k] && ( - pred_k.predecessor_item_of?(k) && predecessor.item_lookahead_set[pred_k].include?(token) || - k.position == 1 && predecessor.lhs_contributions(k.lhs, token)[pred_k] - ) - }] + cs = predecessor.lane_items[self].map {|pred_kernel, kernel| + c = contributions[kernel] && ( + (kernel.position > 1 && predecessor.item_lookahead_set[pred_kernel].include?(annotation.token)) || + (kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, annotation.token)[pred_kernel]) + ) + [pred_kernel, c] }.to_h + [action, cs] end - } - } + }.to_h + + # Observation 3.33 (Simple Split-Stable Dominance) + # + # If all of contributions in the contribution_matrix are + # always contribution or never contribution, we can stop annotate propagations + # to the predecessor state. + next nil if contribution_matrix.all? {|_, contributions| contributions.nil? || contributions.all? {|_, contributed| !contributed } } + + InadequacyAnnotation.new(annotation.state, annotation.token, annotation.actions, contribution_matrix) + }.compact + predecessor.append_annotation_list(propagating_list) end - def lhs_contributions(sym, token) - shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } - if always_follows(shift, next_state).include?(token) - nil - else - kernels.map {|kernel| [kernel, follow_kernel_items(shift, next_state, kernel) && item_lookahead_set[kernel].include?(token)] }.to_h - end + # @rbs () -> Array[Item] + def first_kernels + @first_kernels ||= kernels.select {|kernel| kernel.position == 1 } end - def follow_kernel_items(shift, next_state, kernel) - queue = [[self, shift, next_state]] - until queue.empty? - st, sh, next_st = queue.pop - return true if kernel.next_sym == sh.next_sym && kernel.symbols_after_transition.all?(&:nullable) - st.internal_dependencies(sh, next_st).each {|v| queue << v } + # @rbs (Array[InadequacyAnnotation] propagating_list) -> void + def append_annotation_list(propagating_list) + annotation_list.each do |annotation| + merging_list = propagating_list.select {|a| a.state == annotation.state && a.token == annotation.token && a.actions == annotation.actions } + annotation.merge_matrix(merging_list.map(&:contribution_matrix)) + propagating_list -= merging_list end - false + + @annotation_list += propagating_list end + # Definition 3.31 (compute_lhs_contributions) + # + # @rbs (Grammar::Symbol sym, Grammar::Symbol token) -> (nil | Hash[Item, bool]) + def lhs_contributions(sym, token) + return @lhs_contributions[sym][token] unless @lhs_contributions.dig(sym, token).nil? + + transition = nterm_transitions.find {|goto| goto.next_sym == sym } + @lhs_contributions[sym] ||= {} + @lhs_contributions[sym][token] = + if always_follows[transition].include?(token) + {} + else + kernels.map {|kernel| [kernel, follow_kernel_items[transition][kernel] && item_lookahead_set[kernel].include?(token)] }.to_h + end + end + + # Definition 3.26 (item_lookahead_sets) + # + # @rbs () -> lookahead_set def item_lookahead_set return @item_lookahead_set if @item_lookahead_set - kernels.map {|item| + @item_lookahead_set = kernels.map {|k| [k, []] }.to_h + @item_lookahead_set = kernels.map {|kernel| value = - if item.lhs.accept_symbol? + if kernel.lhs.accept_symbol? [] - elsif item.position > 1 - prev_items = predecessors_with_item(item) + elsif kernel.position > 1 + prev_items = predecessors_with_item(kernel) prev_items.map {|st, i| st.item_lookahead_set[i] }.reduce([]) {|acc, syms| acc |= syms } - elsif item.position == 1 - prev_state = @predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } - shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } - prev_state.goto_follows(shift, next_state) + elsif kernel.position == 1 + prev_state = @predecessors.find {|p| p.transitions.any? {|transition| transition.next_sym == kernel.lhs } } + goto = prev_state.nterm_transitions.find {|goto| goto.next_sym == kernel.lhs } + prev_state.goto_follows[goto] end - [item, value] + [kernel, value] }.to_h end + # @rbs (lookahead_set k) -> void def item_lookahead_set=(k) @item_lookahead_set = k end + # @rbs (Item item) -> Array[[State, Item]] def predecessors_with_item(item) result = [] @predecessors.each do |pre| @@ -349,69 +467,53 @@ def predecessors_with_item(item) result end + # @rbs (State prev_state) -> void def append_predecessor(prev_state) @predecessors << prev_state @predecessors.uniq! end + # Definition 3.39 (compute_goto_follow_set) + # + # @rbs (Grammar::Symbol nterm_token) -> Array[Grammar::Symbol] def goto_follow_set(nterm_token) return [] if nterm_token.accept_symbol? - shift, next_state = @lalr_isocore.nterm_transitions.find {|sh, _| sh.next_sym == nterm_token } + goto = @lalr_isocore.nterm_transitions.find {|g| g.next_sym == nterm_token } @kernels - .select {|kernel| follow_kernel_items(shift, next_state, kernel) } + .select {|kernel| @lalr_isocore.follow_kernel_items[goto][kernel] } .map {|kernel| item_lookahead_set[kernel] } - .reduce(always_follows(shift, next_state)) {|result, terms| result |= terms } - end - - def goto_follows(shift, next_state) - queue = internal_dependencies(shift, next_state) + predecessor_dependencies(shift, next_state) - terms = always_follows(shift, next_state) - until queue.empty? - st, sh, next_st = queue.pop - terms |= st.always_follows(sh, next_st) - st.internal_dependencies(sh, next_st).each {|v| queue << v } - st.predecessor_dependencies(sh, next_st).each {|v| queue << v } - end - terms - end - - def always_follows(shift, next_state) - return @always_follows[[shift, next_state]] if @always_follows[[shift, next_state]] - - queue = internal_dependencies(shift, next_state) + successor_dependencies(shift, next_state) - terms = [] - until queue.empty? - st, sh, next_st = queue.pop - terms |= next_st.term_transitions.map {|sh, _| sh.next_sym } - st.internal_dependencies(sh, next_st).each {|v| queue << v } - st.successor_dependencies(sh, next_st).each {|v| queue << v } - end - @always_follows[[shift, next_state]] = terms + .reduce(@lalr_isocore.always_follows[goto]) {|result, terms| result |= terms } end - def internal_dependencies(shift, next_state) - return @internal_dependencies[[shift, next_state]] if @internal_dependencies[[shift, next_state]] + # Definition 3.8 (Goto Follows Internal Relation) + # + # @rbs (Action::Goto goto) -> Array[Action::Goto] + def internal_dependencies(goto) + return @internal_dependencies[goto] if @internal_dependencies[goto] syms = @items.select {|i| - i.next_sym == shift.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 + i.next_sym == goto.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 }.map(&:lhs).uniq - @internal_dependencies[[shift, next_state]] = nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } + @internal_dependencies[goto] = nterm_transitions.select {|goto2| syms.include?(goto2.next_sym) } end - def successor_dependencies(shift, next_state) - return @successor_dependencies[[shift, next_state]] if @successor_dependencies[[shift, next_state]] + # Definition 3.5 (Goto Follows Successor Relation) + # + # @rbs (Action::Goto goto) -> Array[Action::Goto] + def successor_dependencies(goto) + return @successor_dependencies[goto] if @successor_dependencies[goto] - @successor_dependencies[[shift, next_state]] = - next_state.nterm_transitions - .select {|next_shift, _| next_shift.next_sym.nullable } - .map {|transition| [next_state, *transition] } + @successor_dependencies[goto] = goto.to_state.nterm_transitions.select {|next_goto| next_goto.next_sym.nullable } end - def predecessor_dependencies(shift, next_state) + # Definition 3.9 (Goto Follows Predecessor Relation) + # + # @rbs (Action::Goto goto) -> Array[Action::Goto] + def predecessor_dependencies(goto) state_items = [] @kernels.select {|kernel| - kernel.next_sym == shift.next_sym && kernel.symbols_after_transition.all?(&:nullable) + kernel.next_sym == goto.next_sym && kernel.symbols_after_transition.all?(&:nullable) }.each do |item| queue = predecessors_with_item(item) until queue.empty? @@ -425,8 +527,7 @@ def predecessor_dependencies(shift, next_state) end state_items.map {|state, item| - sh, next_st = state.nterm_transitions.find {|shi, _| shi.next_sym == item.lhs } - [state, sh, next_st] + state.nterm_transitions.find {|goto2| goto2.next_sym == item.lhs } } end end diff --git a/tool/lrama/lib/lrama/state/action.rb b/tool/lrama/lib/lrama/state/action.rb new file mode 100644 index 00000000000000..791685fc23681a --- /dev/null +++ b/tool/lrama/lib/lrama/state/action.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +require_relative "action/goto" +require_relative "action/reduce" +require_relative "action/shift" diff --git a/tool/lrama/lib/lrama/state/action/goto.rb b/tool/lrama/lib/lrama/state/action/goto.rb new file mode 100644 index 00000000000000..4c2c82afdc9dcc --- /dev/null +++ b/tool/lrama/lib/lrama/state/action/goto.rb @@ -0,0 +1,33 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + class Action + class Goto + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @from_state: State + # @next_sym: Grammar::Symbol + # @to_items: Array[Item] + # @to_state: State + + attr_reader :from_state #: State + attr_reader :next_sym #: Grammar::Symbol + attr_reader :to_items #: Array[Item] + attr_reader :to_state #: State + + # @rbs (State from_state, Grammar::Symbol next_sym, Array[Item] to_items, State to_state) -> void + def initialize(from_state, next_sym, to_items, to_state) + @from_state = from_state + @next_sym = next_sym + @to_items = to_items + @to_state = to_state + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/action/reduce.rb b/tool/lrama/lib/lrama/state/action/reduce.rb new file mode 100644 index 00000000000000..9678ab0a98cfad --- /dev/null +++ b/tool/lrama/lib/lrama/state/action/reduce.rb @@ -0,0 +1,71 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + class Action + class Reduce + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @item: Item + # @look_ahead: Array[Grammar::Symbol]? + # @look_ahead_sources: Hash[Grammar::Symbol, Array[Action::Goto]]? + # @not_selected_symbols: Array[Grammar::Symbol] + + attr_reader :item #: Item + attr_reader :look_ahead #: Array[Grammar::Symbol]? + attr_reader :look_ahead_sources #: Hash[Grammar::Symbol, Array[Action::Goto]]? + attr_reader :not_selected_symbols #: Array[Grammar::Symbol] + + # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html + attr_accessor :default_reduction #: bool + + # @rbs (Item item) -> void + def initialize(item) + @item = item + @look_ahead = nil + @look_ahead_sources = nil + @not_selected_symbols = [] + end + + # @rbs () -> Grammar::Rule + def rule + @item.rule + end + + # @rbs (Array[Grammar::Symbol] look_ahead) -> Array[Grammar::Symbol] + def look_ahead=(look_ahead) + @look_ahead = look_ahead.freeze + end + + # @rbs (Hash[Grammar::Symbol, Array[Action::Goto]] sources) -> Hash[Grammar::Symbol, Array[Action::Goto]] + def look_ahead_sources=(sources) + @look_ahead_sources = sources.freeze + end + + # @rbs (Grammar::Symbol sym) -> Array[Grammar::Symbol] + def add_not_selected_symbol(sym) + @not_selected_symbols << sym + end + + # @rbs () -> (::Array[Grammar::Symbol?]) + def selected_look_ahead + if look_ahead + look_ahead - @not_selected_symbols + else + [] + end + end + + # @rbs () -> void + def clear_conflicts + @not_selected_symbols = [] + @default_reduction = nil + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/action/shift.rb b/tool/lrama/lib/lrama/state/action/shift.rb new file mode 100644 index 00000000000000..52d9f8c4f09dc6 --- /dev/null +++ b/tool/lrama/lib/lrama/state/action/shift.rb @@ -0,0 +1,39 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + class Action + class Shift + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @from_state: State + # @next_sym: Grammar::Symbol + # @to_items: Array[Item] + # @to_state: State + + attr_reader :from_state #: State + attr_reader :next_sym #: Grammar::Symbol + attr_reader :to_items #: Array[Item] + attr_reader :to_state #: State + attr_accessor :not_selected #: bool + + # @rbs (State from_state, Grammar::Symbol next_sym, Array[Item] to_items, State to_state) -> void + def initialize(from_state, next_sym, to_items, to_state) + @from_state = from_state + @next_sym = next_sym + @to_items = to_items + @to_state = to_state + end + + # @rbs () -> void + def clear_conflicts + @not_selected = nil + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/inadequacy_annotation.rb b/tool/lrama/lib/lrama/state/inadequacy_annotation.rb new file mode 100644 index 00000000000000..3654fa460727d7 --- /dev/null +++ b/tool/lrama/lib/lrama/state/inadequacy_annotation.rb @@ -0,0 +1,140 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + class InadequacyAnnotation + # @rbs! + # type action = Action::Shift | Action::Reduce + + attr_accessor :state #: State + attr_accessor :token #: Grammar::Symbol + attr_accessor :actions #: Array[action] + attr_accessor :contribution_matrix #: Hash[action, Hash[Item, bool]] + + # @rbs (State state, Grammar::Symbol token, Array[action] actions, Hash[action, Hash[Item, bool]] contribution_matrix) -> void + def initialize(state, token, actions, contribution_matrix) + @state = state + @token = token + @actions = actions + @contribution_matrix = contribution_matrix + end + + # @rbs (Item item) -> bool + def contributed?(item) + @contribution_matrix.any? {|action, contributions| !contributions.nil? && contributions[item] } + end + + # @rbs (Array[Hash[action, Hash[Item, bool]]] another_matrixes) -> void + def merge_matrix(another_matrixes) + another_matrixes.each do |another_matrix| + @contribution_matrix.merge!(another_matrix) {|action, contributions, another_contributions| + next contributions if another_contributions.nil? + next another_contributions if contributions.nil? + + contributions.merge!(another_contributions) {|_, contributed, another_contributed| contributed || another_contributed } + } + end + end + + # Definition 3.42 (dominant_contribution) + # + # @rbs (State::lookahead_set lookaheads) -> Array[action]? + def dominant_contribution(lookaheads) + actions = @actions.select {|action| + contribution_matrix[action].nil? || contribution_matrix[action].any? {|item, contributed| contributed && lookaheads[item].include?(@token) } + } + return nil if actions.empty? + + resolve_conflict(actions) + end + + # @rbs (Array[action] actions) -> Array[action] + def resolve_conflict(actions) + # @type var shifts: Array[Action::Shift] + # @type var reduces: Array[Action::Reduce] + shifts = actions.select {|action| action.is_a?(Action::Shift)} + reduces = actions.select {|action| action.is_a?(Action::Reduce) } + + shifts.each do |shift| + reduces.each do |reduce| + sym = shift.next_sym + + shift_prec = sym.precedence + reduce_prec = reduce.item.rule.precedence + + # Can resolve only when both have prec + unless shift_prec && reduce_prec + next + end + + case + when shift_prec < reduce_prec + # Reduce is selected + actions.delete(shift) + next + when shift_prec > reduce_prec + # Shift is selected + actions.delete(reduce) + next + end + + # shift_prec == reduce_prec, then check associativity + case sym.precedence&.type + when :precedence + # %precedence only specifies precedence and not specify associativity + # then a conflict is unresolved if precedence is same. + next + when :right + # Shift is selected + actions.delete(reduce) + next + when :left + # Reduce is selected + actions.delete(shift) + next + when :nonassoc + # Can not resolve + # + # nonassoc creates "run-time" error, precedence creates "compile-time" error. + # Then omit both the shift and reduce. + # + # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html + actions.delete(shift) + actions.delete(reduce) + else + raise "Unknown precedence type. #{sym}" + end + end + end + + actions + end + + # @rbs () -> String + def to_s + "State: #{@state.id}, Token: #{@token.id.s_value}, Actions: #{actions_to_s}, Contributions: #{contribution_matrix_to_s}" + end + + private + + # @rbs () -> String + def actions_to_s + '[' + @actions.map {|action| + if action.is_a?(Action::Shift) || action.is_a?(Action::Goto) + action.class.name + elsif action.is_a?(Action::Reduce) + "#{action.class.name}: (#{action.item})" + end + }.join(', ') + ']' + end + + # @rbs () -> String + def contribution_matrix_to_s + '[' + @contribution_matrix.map {|action, contributions| + "#{(action.is_a?(Action::Shift) || action.is_a?(Action::Goto)) ? action.class.name : "#{action.class.name}: (#{action.item})"}: " + contributions&.transform_keys(&:to_s).to_s + }.join(', ') + ']' + end + end + end +end diff --git a/tool/lrama/lib/lrama/states/item.rb b/tool/lrama/lib/lrama/state/item.rb similarity index 61% rename from tool/lrama/lib/lrama/states/item.rb rename to tool/lrama/lib/lrama/state/item.rb index e89cb9695b6c7c..3ecdd70b76262a 100644 --- a/tool/lrama/lib/lrama/states/item.rb +++ b/tool/lrama/lib/lrama/state/item.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true # TODO: Validate position is not over rule rhs @@ -5,84 +6,112 @@ require "forwardable" module Lrama - class States + class State class Item < Struct.new(:rule, :position, keyword_init: true) + # @rbs! + # include Grammar::Rule::_DelegatedMethods + # + # attr_accessor rule: Grammar::Rule + # attr_accessor position: Integer + # + # def initialize: (?rule: Grammar::Rule, ?position: Integer) -> void + extend Forwardable def_delegators "rule", :lhs, :rhs # Optimization for States#setup_state + # + # @rbs () -> Integer def hash [rule_id, position].hash end + # @rbs () -> Integer def rule_id rule.id end + # @rbs () -> bool def empty_rule? rule.empty_rule? end + # @rbs () -> Integer def number_of_rest_symbols - rhs.count - position + @number_of_rest_symbols ||= rhs.count - position end + # @rbs () -> Grammar::Symbol def next_sym rhs[position] end + # @rbs () -> Grammar::Symbol def next_next_sym - rhs[position + 1] + @next_next_sym ||= rhs[position + 1] end + # @rbs () -> Grammar::Symbol def previous_sym rhs[position - 1] end + # @rbs () -> bool def end_of_rule? rhs.count == position end + # @rbs () -> bool def beginning_of_rule? position == 0 end + # @rbs () -> bool def start_item? rule.initial_rule? && beginning_of_rule? end + # @rbs () -> State::Item def new_by_next_position Item.new(rule: rule, position: position + 1) end + # @rbs () -> Array[Grammar::Symbol] def symbols_before_dot # steep:ignore rhs[0...position] end + # @rbs () -> Array[Grammar::Symbol] def symbols_after_dot # steep:ignore rhs[position..-1] end - def symbols_after_transition + # @rbs () -> Array[Grammar::Symbol] + def symbols_after_transition # steep:ignore rhs[position+1..-1] end + # @rbs () -> ::String def to_s "#{lhs.id.s_value}: #{display_name}" end + # @rbs () -> ::String def display_name r = rhs.map(&:display_name).insert(position, "•").join(" ") "#{r} (rule #{rule_id})" end # Right after position + # + # @rbs () -> ::String def display_rest r = symbols_after_dot.map(&:display_name).join(" ") ". #{r} (rule #{rule_id})" end + # @rbs (State::Item other_item) -> bool def predecessor_item_of?(other_item) rule == other_item.rule && position == other_item.position - 1 end diff --git a/tool/lrama/lib/lrama/state/reduce.rb b/tool/lrama/lib/lrama/state/reduce.rb deleted file mode 100644 index 54ab87b468c48d..00000000000000 --- a/tool/lrama/lib/lrama/state/reduce.rb +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class State - class Reduce - # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html - attr_reader :item, :look_ahead, :not_selected_symbols - attr_accessor :default_reduction - - def initialize(item) - @item = item - @look_ahead = nil - @not_selected_symbols = [] - end - - def rule - @item.rule - end - - def look_ahead=(look_ahead) - @look_ahead = look_ahead.freeze - end - - def add_not_selected_symbol(sym) - @not_selected_symbols << sym - end - - def selected_look_ahead - if look_ahead - look_ahead - @not_selected_symbols - else - [] - end - end - end - end -end diff --git a/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb index 736d08376a73c7..55ecad40bdd2c3 100644 --- a/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb +++ b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb @@ -1,8 +1,21 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class State - class ReduceReduceConflict < Struct.new(:symbols, :reduce1, :reduce2, keyword_init: true) + class ReduceReduceConflict + attr_reader :symbols #: Array[Grammar::Symbol] + attr_reader :reduce1 #: State::Action::Reduce + attr_reader :reduce2 #: State::Action::Reduce + + # @rbs (symbols: Array[Grammar::Symbol], reduce1: State::Action::Reduce, reduce2: State::Action::Reduce) -> void + def initialize(symbols:, reduce1:, reduce2:) + @symbols = symbols + @reduce1 = reduce1 + @reduce2 = reduce2 + end + + # @rbs () -> :reduce_reduce def type :reduce_reduce end diff --git a/tool/lrama/lib/lrama/state/resolved_conflict.rb b/tool/lrama/lib/lrama/state/resolved_conflict.rb index 3bb3d1446e7214..014533c23315bb 100644 --- a/tool/lrama/lib/lrama/state/resolved_conflict.rb +++ b/tool/lrama/lib/lrama/state/resolved_conflict.rb @@ -1,20 +1,54 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class State + # * state: A state on which the conflct is resolved # * symbol: A symbol under discussion # * reduce: A reduce under discussion # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) - class ResolvedConflict < Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) + # * resolved_by_precedence: If the conflict is resolved by precedence definition or not + class ResolvedConflict + # @rbs! + # type which_enum = :reduce | :shift | :error + + attr_reader :state #: State + attr_reader :symbol #: Grammar::Symbol + attr_reader :reduce #: State::Action::Reduce + attr_reader :which #: which_enum + attr_reader :resolved_by_precedence #: bool + + # @rbs (state: State, symbol: Grammar::Symbol, reduce: State::Action::Reduce, which: which_enum, resolved_by_precedence: bool) -> void + def initialize(state:, symbol:, reduce:, which:, resolved_by_precedence:) + @state = state + @symbol = symbol + @reduce = reduce + @which = which + @resolved_by_precedence = resolved_by_precedence + end + + # @rbs () -> (::String | bot) def report_message + "Conflict between rule #{reduce.rule.id} and token #{symbol.display_name} #{how_resolved}." + end + + # @rbs () -> (::String | bot) + def report_precedences_message + "Conflict between reduce by \"#{reduce.rule.display_name}\" and shift #{symbol.display_name} #{how_resolved}." + end + + private + + # @rbs () -> (::String | bot) + def how_resolved s = symbol.display_name r = reduce.rule.precedence_sym&.display_name case - when which == :shift && same_prec + when which == :shift && resolved_by_precedence msg = "resolved as #{which} (%right #{s})" when which == :shift msg = "resolved as #{which} (#{r} < #{s})" - when which == :reduce && same_prec + when which == :reduce && resolved_by_precedence msg = "resolved as #{which} (%left #{s})" when which == :reduce msg = "resolved as #{which} (#{s} < #{r})" @@ -24,7 +58,7 @@ def report_message raise "Unknown direction. #{self}" end - "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." + msg end end end diff --git a/tool/lrama/lib/lrama/state/shift.rb b/tool/lrama/lib/lrama/state/shift.rb deleted file mode 100644 index 81ef013a17c009..00000000000000 --- a/tool/lrama/lib/lrama/state/shift.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class State - class Shift - attr_reader :next_sym, :next_items - attr_accessor :not_selected - - def initialize(next_sym, next_items) - @next_sym = next_sym - @next_items = next_items - end - end - end -end diff --git a/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb index fd66834539e924..548f2de614a8b1 100644 --- a/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb +++ b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb @@ -1,8 +1,21 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class State - class ShiftReduceConflict < Struct.new(:symbols, :shift, :reduce, keyword_init: true) + class ShiftReduceConflict + attr_reader :symbols #: Array[Grammar::Symbol] + attr_reader :shift #: State::Action::Shift + attr_reader :reduce #: State::Action::Reduce + + # @rbs (symbols: Array[Grammar::Symbol], shift: State::Action::Shift, reduce: State::Action::Reduce) -> void + def initialize(symbols:, shift:, reduce:) + @symbols = symbols + @shift = shift + @reduce = reduce + end + + # @rbs () -> :shift_reduce def type :shift_reduce end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb index fd8ded905f0699..ddce627df400a5 100644 --- a/tool/lrama/lib/lrama/states.rb +++ b/tool/lrama/lib/lrama/states.rb @@ -1,8 +1,9 @@ +# rbs_inline: enabled # frozen_string_literal: true require "forwardable" -require_relative "report/duration" -require_relative "states/item" +require_relative "tracer/duration" +require_relative "state/item" module Lrama # States is passed to a template file @@ -10,17 +11,42 @@ module Lrama # "Efficient Computation of LALR(1) Look-Ahead Sets" # https://dl.acm.org/doi/pdf/10.1145/69622.357187 class States + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # type state_id = Integer + # type rule_id = Integer + # + # include Grammar::_DelegatedMethods + # + # @grammar: Grammar + # @tracer: Tracer + # @states: Array[State] + # @direct_read_sets: Hash[State::Action::Goto, Bitmap::bitmap] + # @reads_relation: Hash[State::Action::Goto, Array[State::Action::Goto]] + # @read_sets: Hash[State::Action::Goto, Bitmap::bitmap] + # @includes_relation: Hash[State::Action::Goto, Array[State::Action::Goto]] + # @lookback_relation: Hash[state_id, Hash[rule_id, Array[State::Action::Goto]]] + # @follow_sets: Hash[State::Action::Goto, Bitmap::bitmap] + # @la: Hash[state_id, Hash[rule_id, Bitmap::bitmap]] + extend Forwardable - include Lrama::Report::Duration + include Lrama::Tracer::Duration - def_delegators "@grammar", :symbols, :terms, :nterms, :rules, - :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value! + def_delegators "@grammar", :symbols, :terms, :nterms, :rules, :precedences, + :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!, :ielr_defined? - attr_reader :states, :reads_relation, :includes_relation, :lookback_relation + attr_reader :states #: Array[State] + attr_reader :reads_relation #: Hash[State::Action::Goto, Array[State::Action::Goto]] + attr_reader :includes_relation #: Hash[State::Action::Goto, Array[State::Action::Goto]] + attr_reader :lookback_relation #: Hash[state_id, Hash[rule_id, Array[State::Action::Goto]]] - def initialize(grammar, trace_state: false) + # @rbs (Grammar grammar, Tracer tracer) -> void + def initialize(grammar, tracer) @grammar = grammar - @trace_state = trace_state + @tracer = tracer @states = [] @@ -28,7 +54,7 @@ def initialize(grammar, trace_state: false) # where p is state, A is nterm, t is term. # # `@direct_read_sets` is a hash whose - # key is [state.id, nterm.token_id], + # key is goto, # value is bitmap of term. @direct_read_sets = {} @@ -37,14 +63,14 @@ def initialize(grammar, trace_state: false) # where p, r are state, A, C are nterm. # # `@reads_relation` is a hash whose - # key is [state.id, nterm.token_id], - # value is array of [state.id, nterm.token_id]. + # key is goto, + # value is array of goto. @reads_relation = {} # `Read(p, A) =s DR(p, A) ∪ ∪{Read(r, C) | (p, A) reads (r, C)}` # # `@read_sets` is a hash whose - # key is [state.id, nterm.token_id], + # key is goto, # value is bitmap of term. @read_sets = {} @@ -52,112 +78,163 @@ def initialize(grammar, trace_state: false) # where p, p' are state, A, B are nterm, β, γ is sequence of symbol. # # `@includes_relation` is a hash whose - # key is [state.id, nterm.token_id], - # value is array of [state.id, nterm.token_id]. + # key is goto, + # value is array of goto. @includes_relation = {} # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q` # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol. # - # `@lookback_relation` is a hash whose - # key is [state.id, rule.id], - # value is array of [state.id, nterm.token_id]. + # `@lookback_relation` is a two-stage hash whose + # first key is state_id, + # second key is rule_id, + # value is array of goto. @lookback_relation = {} # `Follow(p, A) =s Read(p, A) ∪ ∪{Follow(p', B) | (p, A) includes (p', B)}` # # `@follow_sets` is a hash whose - # key is [state.id, rule.id], + # key is goto, # value is bitmap of term. @follow_sets = {} # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)` # - # `@la` is a hash whose - # key is [state.id, rule.id], + # `@la` is a two-stage hash whose + # first key is state_id, + # second key is rule_id, # value is bitmap of term. @la = {} end + # @rbs () -> void def compute - # Look Ahead Sets report_duration(:compute_lr0_states) { compute_lr0_states } - report_duration(:compute_direct_read_sets) { compute_direct_read_sets } - report_duration(:compute_reads_relation) { compute_reads_relation } - report_duration(:compute_read_sets) { compute_read_sets } - report_duration(:compute_includes_relation) { compute_includes_relation } - report_duration(:compute_lookback_relation) { compute_lookback_relation } - report_duration(:compute_follow_sets) { compute_follow_sets } + + # Look Ahead Sets report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } # Conflicts - report_duration(:compute_conflicts) { compute_conflicts } + report_duration(:compute_conflicts) { compute_conflicts(:lalr) } report_duration(:compute_default_reduction) { compute_default_reduction } end + # @rbs () -> void def compute_ielr + # Preparation + report_duration(:clear_conflicts) { clear_conflicts } + # Phase 1 + report_duration(:compute_predecessors) { compute_predecessors } + report_duration(:compute_follow_kernel_items) { compute_follow_kernel_items } + report_duration(:compute_always_follows) { compute_always_follows } + report_duration(:compute_goto_follows) { compute_goto_follows } + # Phase 2 + report_duration(:compute_inadequacy_annotations) { compute_inadequacy_annotations } + # Phase 3 report_duration(:split_states) { split_states } - report_duration(:compute_direct_read_sets) { compute_direct_read_sets } - report_duration(:compute_reads_relation) { compute_reads_relation } - report_duration(:compute_read_sets) { compute_read_sets } - report_duration(:compute_includes_relation) { compute_includes_relation } - report_duration(:compute_lookback_relation) { compute_lookback_relation } - report_duration(:compute_follow_sets) { compute_follow_sets } + # Phase 4 + report_duration(:clear_look_ahead_sets) { clear_look_ahead_sets } report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } - report_duration(:compute_conflicts) { compute_conflicts } - + # Phase 5 + report_duration(:compute_conflicts) { compute_conflicts(:ielr) } report_duration(:compute_default_reduction) { compute_default_reduction } end - def reporter - StatesReporter.new(self) - end - + # @rbs () -> Integer def states_count @states.count end + # @rbs () -> Hash[State::Action::Goto, Array[Grammar::Symbol]] def direct_read_sets - @direct_read_sets.transform_values do |v| + @_direct_read_sets ||= @direct_read_sets.transform_values do |v| bitmap_to_terms(v) end end + # @rbs () -> Hash[State::Action::Goto, Array[Grammar::Symbol]] def read_sets - @read_sets.transform_values do |v| + @_read_sets ||= @read_sets.transform_values do |v| bitmap_to_terms(v) end end + # @rbs () -> Hash[State::Action::Goto, Array[Grammar::Symbol]] def follow_sets - @follow_sets.transform_values do |v| + @_follow_sets ||= @follow_sets.transform_values do |v| bitmap_to_terms(v) end end + # @rbs () -> Hash[state_id, Hash[rule_id, Array[Grammar::Symbol]]] def la - @la.transform_values do |v| - bitmap_to_terms(v) + @_la ||= @la.transform_values do |second_hash| + second_hash.transform_values do |v| + bitmap_to_terms(v) + end end end + # @rbs () -> Integer def sr_conflicts_count @sr_conflicts_count ||= @states.flat_map(&:sr_conflicts).count end + # @rbs () -> Integer def rr_conflicts_count @rr_conflicts_count ||= @states.flat_map(&:rr_conflicts).count end - private + # @rbs (Logger logger) -> void + def validate!(logger) + validate_conflicts_within_threshold!(logger) + end - def trace_state - if @trace_state - yield STDERR + def compute_la_sources_for_conflicted_states + reflexive = {} + @states.each do |state| + state.nterm_transitions.each do |goto| + reflexive[goto] = [goto] + end + end + + # compute_read_sets + read_sets = Digraph.new(nterm_transitions, @reads_relation, reflexive).compute + # compute_follow_sets + follow_sets = Digraph.new(nterm_transitions, @includes_relation, read_sets).compute + + @states.select(&:has_conflicts?).each do |state| + lookback_relation_on_state = @lookback_relation[state.id] + next unless lookback_relation_on_state + rules.each do |rule| + ary = lookback_relation_on_state[rule.id] + next unless ary + + sources = {} + + ary.each do |goto| + source = follow_sets[goto] + + next unless source + + source.each do |goto2| + tokens = direct_read_sets[goto2] + tokens.each do |token| + sources[token] ||= [] + sources[token] |= [goto2] + end + end + end + + state.set_look_ahead_sources(rule, sources) + end end end + private + + # @rbs (Grammar::Symbol accessing_symbol, Array[State::Item] kernels, Hash[Array[State::Item], State] states_created) -> [State, bool] def create_state(accessing_symbol, kernels, states_created) # A item can appear in some states, # so need to use `kernels` (not `kernels.first`) as a key. @@ -204,27 +281,25 @@ def create_state(accessing_symbol, kernels, states_created) return [state, true] end + # @rbs (State state) -> void def setup_state(state) # closure closure = [] - visited = {} queued = {} items = state.kernels.dup items.each do |item| - queued[item] = true + queued[item.rule_id] = true if item.position == 0 end while (item = items.shift) do - visited[item] = true - if (sym = item.next_sym) && sym.nterm? @grammar.find_rules_by_symbol!(sym).each do |rule| - i = Item.new(rule: rule, position: 0) - next if queued[i] + next if queued[rule.id] + i = State::Item.new(rule: rule, position: 0) closure << i items << i - queued[i] = true + queued[i.rule_id] = true end end end @@ -232,119 +307,107 @@ def setup_state(state) state.closure = closure.sort_by {|i| i.rule.id } # Trace - trace_state do |out| - out << "Closure: input\n" - state.kernels.each do |item| - out << " #{item.display_rest}\n" - end - out << "\n\n" - out << "Closure: output\n" - state.items.each do |item| - out << " #{item.display_rest}\n" - end - out << "\n\n" - end + @tracer.trace_closure(state) # shift & reduce - state.compute_shifts_reduces + state.compute_transitions_and_reduces end + # @rbs (Array[State] states, State state) -> void def enqueue_state(states, state) # Trace - previous = state.kernels.first.previous_sym - trace_state do |out| - out << sprintf("state_list_append (state = %d, symbol = %d (%s))\n", - @states.count, previous.number, previous.display_name) - end + @tracer.trace_state_list_append(@states.count, state) states << state end + # @rbs () -> void def compute_lr0_states # State queue states = [] states_created = {} - state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_created) + state, _ = create_state(symbols.first, [State::Item.new(rule: @grammar.rules.first, position: 0)], states_created) enqueue_state(states, state) while (state = states.shift) do # Trace - # - # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but - # I think it is not correct... - previous = state.kernels.first.previous_sym - trace_state do |out| - out << "Processing state #{state.id} (reached by #{previous.display_name})\n" - end + @tracer.trace_state(state) setup_state(state) - state.shifts.each do |shift| - new_state, created = create_state(shift.next_sym, shift.next_items, states_created) - state.set_items_to_state(shift.next_items, new_state) - if created - enqueue_state(states, new_state) - new_state.append_predecessor(state) - end + # `State#transitions` can not be used here + # because `items_to_state` of the `state` is not set yet. + state._transitions.each do |next_sym, to_items| + new_state, created = create_state(next_sym, to_items, states_created) + state.set_items_to_state(to_items, new_state) + state.set_lane_items(next_sym, new_state) + enqueue_state(states, new_state) if created end end end + # @rbs () -> Array[State::Action::Goto] def nterm_transitions a = [] @states.each do |state| - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - a << [state, nterm, next_state] + state.nterm_transitions.each do |goto| + a << goto end end a end + # @rbs () -> void + def compute_look_ahead_sets + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_la) { compute_la } + end + + # @rbs () -> void def compute_direct_read_sets @states.each do |state| - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - - ary = next_state.term_transitions.map do |shift, _| + state.nterm_transitions.each do |goto| + ary = goto.to_state.term_transitions.map do |shift| shift.next_sym.number end - key = [state.id, nterm.token_id] - @direct_read_sets[key] = Bitmap.from_array(ary) + @direct_read_sets[goto] = Bitmap.from_array(ary) end end end + # @rbs () -> void def compute_reads_relation @states.each do |state| - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - next_state.nterm_transitions.each do |shift2, _next_state2| - nterm2 = shift2.next_sym + state.nterm_transitions.each do |goto| + goto.to_state.nterm_transitions.each do |goto2| + nterm2 = goto2.next_sym if nterm2.nullable - key = [state.id, nterm.token_id] - @reads_relation[key] ||= [] - @reads_relation[key] << [next_state.id, nterm2.token_id] + @reads_relation[goto] ||= [] + @reads_relation[goto] << goto2 end end end end end + # @rbs () -> void def compute_read_sets - sets = nterm_transitions.map do |state, nterm, next_state| - [state.id, nterm.token_id] - end - - @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute + @read_sets = Digraph.new(nterm_transitions, @reads_relation, @direct_read_sets).compute end # Execute transition of state by symbols # then return final state. + # + # @rbs (State state, Array[Grammar::Symbol] symbols) -> State def transition(state, symbols) symbols.each do |sym| state = state.transition(sym) @@ -353,10 +416,11 @@ def transition(state, symbols) state end + # @rbs () -> void def compute_includes_relation @states.each do |state| - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym + state.nterm_transitions.each do |goto| + nterm = goto.next_sym @grammar.find_rules_by_symbol!(nterm).each do |rule| i = rule.rhs.count - 1 @@ -366,10 +430,12 @@ def compute_includes_relation break if sym.term? state2 = transition(state, rule.rhs[0...i]) # p' = state, B = nterm, p = state2, A = sym - key = [state2.id, sym.token_id] + key = state2.nterm_transitions.find do |goto2| + goto2.next_sym.token_id == sym.token_id + end || (raise "Goto by #{sym.name} on state #{state2.id} is not found") # TODO: need to omit if state == state2 ? @includes_relation[key] ||= [] - @includes_relation[key] << [state.id, nterm.token_id] + @includes_relation[key] << goto break unless sym.nullable i -= 1 end @@ -378,45 +444,46 @@ def compute_includes_relation end end + # @rbs () -> void def compute_lookback_relation @states.each do |state| - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym + state.nterm_transitions.each do |goto| + nterm = goto.next_sym @grammar.find_rules_by_symbol!(nterm).each do |rule| state2 = transition(state, rule.rhs) # p = state, A = nterm, q = state2, A -> ω = rule - key = [state2.id, rule.id] - @lookback_relation[key] ||= [] - @lookback_relation[key] << [state.id, nterm.token_id] + @lookback_relation[state2.id] ||= {} + @lookback_relation[state2.id][rule.id] ||= [] + @lookback_relation[state2.id][rule.id] << goto end end end end + # @rbs () -> void def compute_follow_sets - sets = nterm_transitions.map do |state, nterm, next_state| - [state.id, nterm.token_id] - end - - @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute + @follow_sets = Digraph.new(nterm_transitions, @includes_relation, @read_sets).compute end - def compute_look_ahead_sets + # @rbs () -> void + def compute_la @states.each do |state| + lookback_relation_on_state = @lookback_relation[state.id] + next unless lookback_relation_on_state rules.each do |rule| - ary = @lookback_relation[[state.id, rule.id]] + ary = lookback_relation_on_state[rule.id] next unless ary - ary.each do |state2_id, nterm_token_id| + ary.each do |goto| # q = state, A -> ω = rule, p = state2, A = nterm - follows = @follow_sets[[state2_id, nterm_token_id]] + follows = @follow_sets[goto] next if follows == 0 - key = [state.id, rule.id] - @la[key] ||= 0 - look_ahead = @la[key] | follows - @la[key] |= look_ahead + @la[state.id] ||= {} + @la[state.id][rule.id] ||= 0 + look_ahead = @la[state.id][rule.id] | follows + @la[state.id][rule.id] |= look_ahead # No risk of conflict when # * the state only has single reduce @@ -429,6 +496,7 @@ def compute_look_ahead_sets end end + # @rbs (Bitmap::bitmap bit) -> Array[Grammar::Symbol] def bitmap_to_terms(bit) ary = Bitmap.to_array(bit) ary.map do |i| @@ -436,14 +504,16 @@ def bitmap_to_terms(bit) end end - def compute_conflicts - compute_shift_reduce_conflicts + # @rbs () -> void + def compute_conflicts(lr_type) + compute_shift_reduce_conflicts(lr_type) compute_reduce_reduce_conflicts end - def compute_shift_reduce_conflicts + # @rbs () -> void + def compute_shift_reduce_conflicts(lr_type) states.each do |state| - state.shifts.each do |shift| + state.term_transitions.each do |shift| state.reduces.each do |reduce| sym = shift.next_sym @@ -463,43 +533,57 @@ def compute_shift_reduce_conflicts case when shift_prec < reduce_prec # Reduce is selected - state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce) + resolved_conflict = State::ResolvedConflict.new(state: state, symbol: sym, reduce: reduce, which: :reduce, resolved_by_precedence: false) + state.resolved_conflicts << resolved_conflict shift.not_selected = true + mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) next when shift_prec > reduce_prec # Shift is selected - state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift) + resolved_conflict = State::ResolvedConflict.new(state: state, symbol: sym, reduce: reduce, which: :shift, resolved_by_precedence: false) + state.resolved_conflicts << resolved_conflict reduce.add_not_selected_symbol(sym) + mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) next end # shift_prec == reduce_prec, then check associativity case sym.precedence.type when :precedence + # Can not resolve the conflict + # # %precedence only specifies precedence and not specify associativity # then a conflict is unresolved if precedence is same. state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) next when :right # Shift is selected - state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true) + resolved_conflict = State::ResolvedConflict.new(state: state, symbol: sym, reduce: reduce, which: :shift, resolved_by_precedence: true) + state.resolved_conflicts << resolved_conflict reduce.add_not_selected_symbol(sym) + mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) next when :left # Reduce is selected - state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true) + resolved_conflict = State::ResolvedConflict.new(state: state, symbol: sym, reduce: reduce, which: :reduce, resolved_by_precedence: true) + state.resolved_conflicts << resolved_conflict shift.not_selected = true + mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) next when :nonassoc - # Can not resolve + # The conflict is resolved # - # nonassoc creates "run-time" error, precedence creates "compile-time" error. - # Then omit both the shift and reduce. + # %nonassoc creates "run-time" error by removing both shift and reduce from + # the state. This makes the state to get syntax error if the conflicted token appears. + # On the other hand, %precedence creates "compile-time" error by keeping both + # shift and reduce on the state. This makes the state to be conflicted on the token. # # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html - state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error) + resolved_conflict = State::ResolvedConflict.new(state: state, symbol: sym, reduce: reduce, which: :error, resolved_by_precedence: false) + state.resolved_conflicts << resolved_conflict shift.not_selected = true reduce.add_not_selected_symbol(sym) + mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) else raise "Unknown precedence type. #{sym}" end @@ -508,35 +592,41 @@ def compute_shift_reduce_conflicts end end + # @rbs (Grammar::Precedence shift_prec, Grammar::Precedence reduce_prec, State::ResolvedConflict resolved_conflict) -> void + def mark_precedences_used(lr_type, shift_prec, reduce_prec, resolved_conflict) + case lr_type + when :lalr + shift_prec.mark_used_by_lalr(resolved_conflict) + reduce_prec.mark_used_by_lalr(resolved_conflict) + when :ielr + shift_prec.mark_used_by_ielr(resolved_conflict) + reduce_prec.mark_used_by_ielr(resolved_conflict) + end + end + + # @rbs () -> void def compute_reduce_reduce_conflicts states.each do |state| - count = state.reduces.count - - (0...count).each do |i| - reduce1 = state.reduces[i] - next if reduce1.look_ahead.nil? + state.reduces.combination(2) do |reduce1, reduce2| + next if reduce1.look_ahead.nil? || reduce2.look_ahead.nil? - ((i+1)...count).each do |j| - reduce2 = state.reduces[j] - next if reduce2.look_ahead.nil? + intersection = reduce1.look_ahead & reduce2.look_ahead - intersection = reduce1.look_ahead & reduce2.look_ahead - - unless intersection.empty? - state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) - end + unless intersection.empty? + state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) end end end end + # @rbs () -> void def compute_default_reduction states.each do |state| next if state.reduces.empty? # Do not set, if conflict exist next unless state.conflicts.empty? # Do not set, if shift with `error` exists. - next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol) + next if state.term_transitions.map {|shift| shift.next_sym }.include?(@grammar.error_symbol) state.default_reduction_rule = state.reduces.map do |r| [r.rule, r.rule.id, (r.look_ahead || []).count] @@ -546,35 +636,171 @@ def compute_default_reduction end end + # @rbs () -> void + def clear_conflicts + states.each(&:clear_conflicts) + end + + # Definition 3.15 (Predecessors) + # + # @rbs () -> void + def compute_predecessors + @states.each do |state| + state.transitions.each do |transition| + transition.to_state.append_predecessor(state) + end + end + end + + # Definition 3.16 (follow_kernel_items) + # + # @rbs () -> void + def compute_follow_kernel_items + set = nterm_transitions + relation = compute_goto_internal_relation + base_function = compute_goto_bitmaps + Digraph.new(set, relation, base_function).compute.each do |goto, follow_kernel_items| + state = goto.from_state + state.follow_kernel_items[goto] = state.kernels.map {|kernel| + [kernel, Bitmap.to_bool_array(follow_kernel_items, state.kernels.count)] + }.to_h + end + end + + # @rbs () -> Hash[State::Action::Goto, Array[State::Action::Goto]] + def compute_goto_internal_relation + relations = {} + + @states.each do |state| + state.nterm_transitions.each do |goto| + relations[goto] = state.internal_dependencies(goto) + end + end + + relations + end + + # @rbs () -> Hash[State::Action::Goto, Bitmap::bitmap] + def compute_goto_bitmaps + nterm_transitions.map {|goto| + bools = goto.from_state.kernels.map.with_index {|kernel, i| i if kernel.next_sym == goto.next_sym && kernel.symbols_after_transition.all?(&:nullable) }.compact + [goto, Bitmap.from_array(bools)] + }.to_h + end + + # Definition 3.20 (always_follows, one closure) + # + # @rbs () -> void + def compute_always_follows + set = nterm_transitions + relation = compute_goto_successor_or_internal_relation + base_function = compute_transition_bitmaps + Digraph.new(set, relation, base_function).compute.each do |goto, always_follows_bitmap| + goto.from_state.always_follows[goto] = bitmap_to_terms(always_follows_bitmap) + end + end + + # @rbs () -> Hash[State::Action::Goto, Array[State::Action::Goto]] + def compute_goto_successor_or_internal_relation + relations = {} + + @states.each do |state| + state.nterm_transitions.each do |goto| + relations[goto] = state.successor_dependencies(goto) + state.internal_dependencies(goto) + end + end + + relations + end + + # @rbs () -> Hash[State::Action::Goto, Bitmap::bitmap] + def compute_transition_bitmaps + nterm_transitions.map {|goto| + [goto, Bitmap.from_array(goto.to_state.term_transitions.map {|shift| shift.next_sym.number })] + }.to_h + end + + # Definition 3.24 (goto_follows, via always_follows) + # + # @rbs () -> void + def compute_goto_follows + set = nterm_transitions + relation = compute_goto_internal_or_predecessor_dependencies + base_function = compute_always_follows_bitmaps + Digraph.new(set, relation, base_function).compute.each do |goto, goto_follows_bitmap| + goto.from_state.goto_follows[goto] = bitmap_to_terms(goto_follows_bitmap) + end + end + + # @rbs () -> Hash[State::Action::Goto, Array[State::Action::Goto]] + def compute_goto_internal_or_predecessor_dependencies + relations = {} + + @states.each do |state| + state.nterm_transitions.each do |goto| + relations[goto] = state.internal_dependencies(goto) + state.predecessor_dependencies(goto) + end + end + + relations + end + + # @rbs () -> Hash[State::Action::Goto, Bitmap::bitmap] + def compute_always_follows_bitmaps + nterm_transitions.map {|goto| + [goto, Bitmap.from_array(goto.from_state.always_follows[goto].map(&:number))] + }.to_h + end + + # @rbs () -> void def split_states @states.each do |state| - state.transitions.each do |shift, next_state| - compute_state(state, shift, next_state) + state.transitions.each do |transition| + compute_state(state, transition, transition.to_state) end end end + # @rbs () -> void + def compute_inadequacy_annotations + @states.each do |state| + state.annotate_manifestation + end + + queue = @states.reject {|state| state.annotation_list.empty? } + + while (curr = queue.shift) do + curr.predecessors.each do |pred| + cache = pred.annotation_list.dup + curr.annotate_predecessor(pred) + queue << pred if cache != pred.annotation_list && !queue.include?(pred) + end + end + end + + # @rbs (State state, State::lookahead_set filtered_lookaheads) -> void def merge_lookaheads(state, filtered_lookaheads) return if state.kernels.all? {|item| (filtered_lookaheads[item] - state.item_lookahead_set[item]).empty? } state.item_lookahead_set = state.item_lookahead_set.merge {|_, v1, v2| v1 | v2 } - state.transitions.each do |shift, next_state| - next if next_state.lookaheads_recomputed - compute_state(state, shift, next_state) + state.transitions.each do |transition| + next if transition.to_state.lookaheads_recomputed + compute_state(state, transition, transition.to_state) end end - def compute_state(state, shift, next_state) - filtered_lookaheads = state.propagate_lookaheads(next_state) - s = next_state.ielr_isocores.find {|st| st.compatible_lookahead?(filtered_lookaheads) } + # @rbs (State state, State::Action::Shift | State::Action::Goto transition, State next_state) -> void + def compute_state(state, transition, next_state) + propagating_lookaheads = state.propagate_lookaheads(next_state) + s = next_state.ielr_isocores.find {|st| st.is_compatible?(propagating_lookaheads) } if s.nil? - s = next_state.ielr_isocores.last + s = next_state.lalr_isocore new_state = State.new(@states.count, s.accessing_symbol, s.kernels) new_state.closure = s.closure - new_state.compute_shifts_reduces - s.transitions.each do |sh, next_state| - new_state.set_items_to_state(sh.next_items, next_state) + new_state.compute_transitions_and_reduces + s.transitions.each do |transition| + new_state.set_items_to_state(transition.to_items, transition.to_state) end @states << new_state new_state.lalr_isocore = s @@ -582,14 +808,60 @@ def compute_state(state, shift, next_state) s.ielr_isocores.each do |st| st.ielr_isocores = s.ielr_isocores end - new_state.item_lookahead_set = filtered_lookaheads - state.update_transition(shift, new_state) + new_state.lookaheads_recomputed = true + new_state.item_lookahead_set = propagating_lookaheads + state.update_transition(transition, new_state) elsif(!s.lookaheads_recomputed) - s.item_lookahead_set = filtered_lookaheads + s.lookaheads_recomputed = true + s.item_lookahead_set = propagating_lookaheads else - state.update_transition(shift, s) - merge_lookaheads(s, filtered_lookaheads) + merge_lookaheads(s, propagating_lookaheads) + state.update_transition(transition, s) if state.items_to_state[transition.to_items].id != s.id end end + + # @rbs (Logger logger) -> void + def validate_conflicts_within_threshold!(logger) + exit false unless conflicts_within_threshold?(logger) + end + + # @rbs (Logger logger) -> bool + def conflicts_within_threshold?(logger) + return true unless @grammar.expect + + [sr_conflicts_within_threshold?(logger), rr_conflicts_within_threshold?(logger)].all? + end + + # @rbs (Logger logger) -> bool + def sr_conflicts_within_threshold?(logger) + return true if @grammar.expect == sr_conflicts_count + + logger.error("shift/reduce conflicts: #{sr_conflicts_count} found, #{@grammar.expect} expected") + false + end + + # @rbs (Logger logger) -> bool + def rr_conflicts_within_threshold?(logger, expected: 0) + return true if expected == rr_conflicts_count + + logger.error("reduce/reduce conflicts: #{rr_conflicts_count} found, #{expected} expected") + false + end + + # @rbs () -> void + def clear_look_ahead_sets + @direct_read_sets.clear + @reads_relation.clear + @read_sets.clear + @includes_relation.clear + @lookback_relation.clear + @follow_sets.clear + @la.clear + + @_direct_read_sets = nil + @_read_sets = nil + @_follow_sets = nil + @_la = nil + end end end diff --git a/tool/lrama/lib/lrama/states_reporter.rb b/tool/lrama/lib/lrama/states_reporter.rb deleted file mode 100644 index 64ff4de1006c58..00000000000000 --- a/tool/lrama/lib/lrama/states_reporter.rb +++ /dev/null @@ -1,362 +0,0 @@ -# frozen_string_literal: true - -module Lrama - class StatesReporter - include Lrama::Report::Duration - - def initialize(states) - @states = states - end - - def report(io, **options) - report_duration(:report) do - _report(io, **options) - end - end - - private - - def _report(io, grammar: false, rules: false, terms: false, states: false, itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false) - report_unused_rules(io) if rules - report_unused_terms(io) if terms - report_conflicts(io) - report_grammar(io) if grammar - report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) - end - - def report_unused_terms(io) - look_aheads = @states.states.each do |state| - state.reduces.flat_map do |reduce| - reduce.look_ahead unless reduce.look_ahead.nil? - end - end - - next_terms = @states.states.flat_map do |state| - state.shifts.map(&:next_sym).select(&:term?) - end - - unused_symbols = @states.terms.select do |term| - !(look_aheads + next_terms).include?(term) - end - - unless unused_symbols.empty? - io << "#{unused_symbols.count} Unused Terms\n\n" - unused_symbols.each_with_index do |term, index| - io << sprintf("%5d %s\n", index, term.id.s_value) - end - io << "\n\n" - end - end - - def report_unused_rules(io) - used_rules = @states.rules.flat_map(&:rhs) - - unused_rules = @states.rules.map(&:lhs).select do |rule| - !used_rules.include?(rule) && rule.token_id != 0 - end - - unless unused_rules.empty? - io << "#{unused_rules.count} Unused Rules\n\n" - unused_rules.each_with_index do |rule, index| - io << sprintf("%5d %s\n", index, rule.display_name) - end - io << "\n\n" - end - end - - def report_conflicts(io) - has_conflict = false - - @states.states.each do |state| - messages = [] - cs = state.conflicts.group_by(&:type) - if cs[:shift_reduce] - messages << "#{cs[:shift_reduce].count} shift/reduce" - end - - if cs[:reduce_reduce] - messages << "#{cs[:reduce_reduce].count} reduce/reduce" - end - - unless messages.empty? - has_conflict = true - io << "State #{state.id} conflicts: #{messages.join(', ')}\n" - end - end - - if has_conflict - io << "\n\n" - end - end - - def report_grammar(io) - io << "Grammar\n" - last_lhs = nil - - @states.rules.each do |rule| - if rule.empty_rule? - r = "ε" - else - r = rule.rhs.map(&:display_name).join(" ") - end - - if rule.lhs == last_lhs - io << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r) - else - io << "\n" - io << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r) - end - - last_lhs = rule.lhs - end - io << "\n\n" - end - - def report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) - if counterexamples - cex = Counterexamples.new(@states) - end - - @states.states.each do |state| - # Report State - io << "State #{state.id}\n\n" - - # Report item - last_lhs = nil - list = itemsets ? state.items : state.kernels - list.sort_by {|i| [i.rule_id, i.position] }.each do |item| - if item.empty_rule? - r = "ε •" - else - r = item.rhs.map(&:display_name).insert(item.position, "•").join(" ") - end - if item.lhs == last_lhs - l = " " * item.lhs.id.s_value.length + "|" - else - l = item.lhs.id.s_value + ":" - end - la = "" - if lookaheads && item.end_of_rule? - reduce = state.find_reduce_by_item!(item) - look_ahead = reduce.selected_look_ahead - unless look_ahead.empty? - la = " [#{look_ahead.map(&:display_name).join(", ")}]" - end - end - last_lhs = item.lhs - - io << sprintf("%5i %s %s%s\n", item.rule_id, l, r, la) - end - io << "\n" - - # Report shifts - tmp = state.term_transitions.reject do |shift, _| - shift.not_selected - end.map do |shift, next_state| - [shift.next_sym, next_state.id] - end - max_len = tmp.map(&:first).map(&:display_name).map(&:length).max - tmp.each do |term, state_id| - io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n" - end - io << "\n" unless tmp.empty? - - # Report error caused by %nonassoc - nl = false - tmp = state.resolved_conflicts.select do |resolved| - resolved.which == :error - end.map do |error| - error.symbol.display_name - end - max_len = tmp.map(&:length).max - tmp.each do |name| - nl = true - io << " #{name.ljust(max_len)} error (nonassociative)\n" - end - io << "\n" unless tmp.empty? - - # Report reduces - nl = false - max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0 - max_len = [max_len, "$default".length].max if state.default_reduction_rule - ary = [] - - state.non_default_reduces.each do |reduce| - reduce.look_ahead.each do |term| - ary << [term, reduce] - end - end - - ary.sort_by do |term, reduce| - term.number - end.each do |term, reduce| - rule = reduce.item.rule - io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n" - nl = true - end - - if (r = state.default_reduction_rule) - nl = true - s = "$default".ljust(max_len) - - if r.initial_rule? - io << " #{s} accept\n" - else - io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n" - end - end - io << "\n" if nl - - # Report nonterminal transitions - tmp = [] - max_len = 0 - state.nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - tmp << [nterm, next_state.id] - max_len = [max_len, nterm.id.s_value.length].max - end - tmp.uniq! - tmp.sort_by! do |nterm, state_id| - nterm.number - end - tmp.each do |nterm, state_id| - io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n" - end - io << "\n" unless tmp.empty? - - if solved - # Report conflict resolutions - state.resolved_conflicts.each do |resolved| - io << " #{resolved.report_message}\n" - end - io << "\n" unless state.resolved_conflicts.empty? - end - - if counterexamples && state.has_conflicts? - # Report counterexamples - examples = cex.compute(state) - examples.each do |example| - label0 = example.type == :shift_reduce ? "shift/reduce" : "reduce/reduce" - label1 = example.type == :shift_reduce ? "Shift derivation" : "First Reduce derivation" - label2 = example.type == :shift_reduce ? "Reduce derivation" : "Second Reduce derivation" - - io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" - io << " #{example.path1_item}\n" - io << " #{example.path2_item}\n" - io << " #{label1}\n" - example.derivations1.render_strings_for_report.each do |str| - io << " #{str}\n" - end - io << " #{label2}\n" - example.derivations2.render_strings_for_report.each do |str| - io << " #{str}\n" - end - end - end - - if verbose - # Report direct_read_sets - io << " [Direct Read sets]\n" - direct_read_sets = @states.direct_read_sets - @states.nterms.each do |nterm| - terms = direct_read_sets[[state.id, nterm.token_id]] - next unless terms - next if terms.empty? - - str = terms.map {|sym| sym.id.s_value }.join(", ") - io << " read #{nterm.id.s_value} shift #{str}\n" - end - io << "\n" - - # Report reads_relation - io << " [Reads Relation]\n" - @states.nterms.each do |nterm| - a = @states.reads_relation[[state.id, nterm.token_id]] - next unless a - - a.each do |state_id2, nterm_id2| - n = @states.nterms.find {|n| n.token_id == nterm_id2 } - io << " (State #{state_id2}, #{n.id.s_value})\n" - end - end - io << "\n" - - # Report read_sets - io << " [Read sets]\n" - read_sets = @states.read_sets - @states.nterms.each do |nterm| - terms = read_sets[[state.id, nterm.token_id]] - next unless terms - next if terms.empty? - - terms.each do |sym| - io << " #{sym.id.s_value}\n" - end - end - io << "\n" - - # Report includes_relation - io << " [Includes Relation]\n" - @states.nterms.each do |nterm| - a = @states.includes_relation[[state.id, nterm.token_id]] - next unless a - - a.each do |state_id2, nterm_id2| - n = @states.nterms.find {|n| n.token_id == nterm_id2 } - io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n" - end - end - io << "\n" - - # Report lookback_relation - io << " [Lookback Relation]\n" - @states.rules.each do |rule| - a = @states.lookback_relation[[state.id, rule.id]] - next unless a - - a.each do |state_id2, nterm_id2| - n = @states.nterms.find {|n| n.token_id == nterm_id2 } - io << " (Rule: #{rule.display_name}) -> (State #{state_id2}, #{n.id.s_value})\n" - end - end - io << "\n" - - # Report follow_sets - io << " [Follow sets]\n" - follow_sets = @states.follow_sets - @states.nterms.each do |nterm| - terms = follow_sets[[state.id, nterm.token_id]] - - next unless terms - - terms.each do |sym| - io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n" - end - end - io << "\n" - - # Report LA - io << " [Look-Ahead Sets]\n" - tmp = [] - max_len = 0 - @states.rules.each do |rule| - syms = @states.la[[state.id, rule.id]] - next unless syms - - tmp << [rule, syms] - max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max - end - tmp.each do |rule, syms| - syms.each do |sym| - io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n" - end - end - io << "\n" unless tmp.empty? - end - - # End of Report State - io << "\n" - end - end - end -end diff --git a/tool/lrama/lib/lrama/trace_reporter.rb b/tool/lrama/lib/lrama/trace_reporter.rb deleted file mode 100644 index bcf1ef1e50327e..00000000000000 --- a/tool/lrama/lib/lrama/trace_reporter.rb +++ /dev/null @@ -1,45 +0,0 @@ -# rbs_inline: enabled -# frozen_string_literal: true - -module Lrama - class TraceReporter - # @rbs (Lrama::Grammar grammar) -> void - def initialize(grammar) - @grammar = grammar - end - - # @rbs (**Hash[Symbol, bool] options) -> void - def report(**options) - _report(**options) - end - - private - - # @rbs rules: (bool rules, bool actions, bool only_explicit_rules, **untyped _) -> void - def _report(rules: false, actions: false, only_explicit_rules: false, **_) - report_rules if rules && !only_explicit_rules - report_only_explicit_rules if only_explicit_rules - report_actions if actions - end - - # @rbs () -> void - def report_rules - puts "Grammar rules:" - @grammar.rules.each { |rule| puts rule.display_name } - end - - # @rbs () -> void - def report_only_explicit_rules - puts "Grammar rules:" - @grammar.rules.each do |rule| - puts rule.display_name_without_action if rule.lhs.first_set.any? - end - end - - # @rbs () -> void - def report_actions - puts "Grammar rules with actions:" - @grammar.rules.each { |rule| puts rule.with_actions } - end - end -end diff --git a/tool/lrama/lib/lrama/tracer.rb b/tool/lrama/lib/lrama/tracer.rb new file mode 100644 index 00000000000000..fda699a6654a12 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer.rb @@ -0,0 +1,51 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require_relative "tracer/actions" +require_relative "tracer/closure" +require_relative "tracer/duration" +require_relative "tracer/only_explicit_rules" +require_relative "tracer/rules" +require_relative "tracer/state" + +module Lrama + class Tracer + # @rbs (IO io, **bool options) -> void + def initialize(io, **options) + @io = io + @options = options + @only_explicit_rules = OnlyExplicitRules.new(io, **options) + @rules = Rules.new(io, **options) + @actions = Actions.new(io, **options) + @closure = Closure.new(io, **options) + @state = State.new(io, **options) + end + + # @rbs (Lrama::Grammar grammar) -> void + def trace(grammar) + @only_explicit_rules.trace(grammar) + @rules.trace(grammar) + @actions.trace(grammar) + end + + # @rbs (Lrama::State state) -> void + def trace_closure(state) + @closure.trace(state) + end + + # @rbs (Lrama::State state) -> void + def trace_state(state) + @state.trace(state) + end + + # @rbs (Integer state_count, Lrama::State state) -> void + def trace_state_list_append(state_count, state) + @state.trace_list_append(state_count, state) + end + + # @rbs () -> void + def enable_duration + Duration.enable if @options[:time] + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/actions.rb b/tool/lrama/lib/lrama/tracer/actions.rb new file mode 100644 index 00000000000000..7b9c9b9f530103 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/actions.rb @@ -0,0 +1,22 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + class Actions + # @rbs (IO io, ?actions: bool, **bool options) -> void + def initialize(io, actions: false, **options) + @io = io + @actions = actions + end + + # @rbs (Lrama::Grammar grammar) -> void + def trace(grammar) + return unless @actions + + @io << "Grammar rules with actions:" << "\n" + grammar.rules.each { |rule| @io << rule.with_actions << "\n" } + end + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/closure.rb b/tool/lrama/lib/lrama/tracer/closure.rb new file mode 100644 index 00000000000000..5b2f0b27e65f9f --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/closure.rb @@ -0,0 +1,30 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + class Closure + # @rbs (IO io, ?automaton: bool, ?closure: bool, **bool) -> void + def initialize(io, automaton: false, closure: false, **_) + @io = io + @closure = automaton || closure + end + + # @rbs (Lrama::State state) -> void + def trace(state) + return unless @closure + + @io << "Closure: input" << "\n" + state.kernels.each do |item| + @io << " #{item.display_rest}" << "\n" + end + @io << "\n\n" + @io << "Closure: output" << "\n" + state.items.each do |item| + @io << " #{item.display_rest}" << "\n" + end + @io << "\n\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/duration.rb b/tool/lrama/lib/lrama/tracer/duration.rb new file mode 100644 index 00000000000000..91c49625b2c087 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/duration.rb @@ -0,0 +1,38 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + module Duration + # TODO: rbs-inline 0.11.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # see: https://github.com/soutaro/rbs-inline/pull/149 + # + # @rbs! + # @_report_duration_enabled: bool + + # @rbs () -> void + def self.enable + @_report_duration_enabled = true + end + + # @rbs () -> bool + def self.enabled? + !!@_report_duration_enabled + end + + # @rbs [T] (_ToS message) { -> T } -> T + def report_duration(message) + time1 = Time.now.to_f + result = yield + time2 = Time.now.to_f + + if Duration.enabled? + STDERR.puts sprintf("%s %10.5f s", message, time2 - time1) + end + + return result + end + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/only_explicit_rules.rb b/tool/lrama/lib/lrama/tracer/only_explicit_rules.rb new file mode 100644 index 00000000000000..4f64e7d2f47a36 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/only_explicit_rules.rb @@ -0,0 +1,24 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + class OnlyExplicitRules + # @rbs (IO io, ?only_explicit: bool, **bool) -> void + def initialize(io, only_explicit: false, **_) + @io = io + @only_explicit = only_explicit + end + + # @rbs (Lrama::Grammar grammar) -> void + def trace(grammar) + return unless @only_explicit + + @io << "Grammar rules:" << "\n" + grammar.rules.each do |rule| + @io << rule.display_name_without_action << "\n" if rule.lhs.first_set.any? + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/rules.rb b/tool/lrama/lib/lrama/tracer/rules.rb new file mode 100644 index 00000000000000..d6e85b8432f300 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/rules.rb @@ -0,0 +1,23 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + class Rules + # @rbs (IO io, ?rules: bool, ?only_explicit: bool, **bool) -> void + def initialize(io, rules: false, only_explicit: false, **_) + @io = io + @rules = rules + @only_explicit = only_explicit + end + + # @rbs (Lrama::Grammar grammar) -> void + def trace(grammar) + return if !@rules || @only_explicit + + @io << "Grammar rules:" << "\n" + grammar.rules.each { |rule| @io << rule.display_name << "\n" } + end + end + end +end diff --git a/tool/lrama/lib/lrama/tracer/state.rb b/tool/lrama/lib/lrama/tracer/state.rb new file mode 100644 index 00000000000000..21c0047f8e8047 --- /dev/null +++ b/tool/lrama/lib/lrama/tracer/state.rb @@ -0,0 +1,33 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Tracer + class State + # @rbs (IO io, ?automaton: bool, ?closure: bool, **bool) -> void + def initialize(io, automaton: false, closure: false, **_) + @io = io + @state = automaton || closure + end + + # @rbs (Lrama::State state) -> void + def trace(state) + return unless @state + + # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but + # I think it is not correct... + previous = state.kernels.first.previous_sym + @io << "Processing state #{state.id} (reached by #{previous.display_name})" << "\n" + end + + # @rbs (Integer state_count, Lrama::State state) -> void + def trace_list_append(state_count, state) + return unless @state + + previous = state.kernels.first.previous_sym + @io << sprintf("state_list_append (state = %d, symbol = %d (%s))", + state_count, previous.number, previous.display_name) << "\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index 12ece5a8f2b153..d649b749391910 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,5 +1,6 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama - VERSION = "0.7.0".freeze + VERSION = "0.7.1".freeze #: String end diff --git a/tool/lrama/lib/lrama/warnings.rb b/tool/lrama/lib/lrama/warnings.rb new file mode 100644 index 00000000000000..52f09144ef0313 --- /dev/null +++ b/tool/lrama/lib/lrama/warnings.rb @@ -0,0 +1,33 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require_relative 'warnings/conflicts' +require_relative 'warnings/implicit_empty' +require_relative 'warnings/name_conflicts' +require_relative 'warnings/redefined_rules' +require_relative 'warnings/required' +require_relative 'warnings/useless_precedence' + +module Lrama + class Warnings + # @rbs (Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @conflicts = Conflicts.new(logger, warnings) + @implicit_empty = ImplicitEmpty.new(logger, warnings) + @name_conflicts = NameConflicts.new(logger, warnings) + @redefined_rules = RedefinedRules.new(logger, warnings) + @required = Required.new(logger, warnings) + @useless_precedence = UselessPrecedence.new(logger, warnings) + end + + # @rbs (Lrama::Grammar grammar, Lrama::States states) -> void + def warn(grammar, states) + @conflicts.warn(states) + @implicit_empty.warn(grammar) + @name_conflicts.warn(grammar) + @redefined_rules.warn(grammar) + @required.warn(grammar) + @useless_precedence.warn(grammar, states) + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/conflicts.rb b/tool/lrama/lib/lrama/warnings/conflicts.rb new file mode 100644 index 00000000000000..6ba0de6f9c5e64 --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/conflicts.rb @@ -0,0 +1,27 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + class Conflicts + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::States states) -> void + def warn(states) + return unless @warnings + + if states.sr_conflicts_count != 0 + @logger.warn("shift/reduce conflicts: #{states.sr_conflicts_count} found") + end + + if states.rr_conflicts_count != 0 + @logger.warn("reduce/reduce conflicts: #{states.rr_conflicts_count} found") + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/implicit_empty.rb b/tool/lrama/lib/lrama/warnings/implicit_empty.rb new file mode 100644 index 00000000000000..ba81adca01cbba --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/implicit_empty.rb @@ -0,0 +1,29 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + # Warning rationale: Empty rules are easily overlooked and ambiguous + # - Empty alternatives like `rule: | "token";` can be missed during code reading + # - Difficult to distinguish between intentional empty rules vs. omissions + # - Explicit marking with %empty directive comment improves clarity + class ImplicitEmpty + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar) -> void + def warn(grammar) + return unless @warnings + + grammar.rule_builders.each do |builder| + if builder.rhs.empty? + @logger.warn("warning: empty rule without %empty") + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/name_conflicts.rb b/tool/lrama/lib/lrama/warnings/name_conflicts.rb new file mode 100644 index 00000000000000..c0754ab55125ba --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/name_conflicts.rb @@ -0,0 +1,63 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + # Warning rationale: Parameterized rule names conflicting with symbol names + # - When a %rule name is identical to a terminal or non-terminal symbol name, + # it reduces grammar readability and may cause unintended behavior + # - Detecting these conflicts helps improve grammar definition quality + class NameConflicts + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar) -> void + def warn(grammar) + return unless @warnings + return if grammar.parameterized_rules.empty? + + symbol_names = collect_symbol_names(grammar) + check_conflicts(grammar.parameterized_rules, symbol_names) + end + + private + + # @rbs (Lrama::Grammar grammar) -> Set[String] + def collect_symbol_names(grammar) + symbol_names = Set.new + + collect_term_names(grammar.terms, symbol_names) + collect_nterm_names(grammar.nterms, symbol_names) + + symbol_names + end + + # @rbs (Array[untyped] terms, Set[String] symbol_names) -> void + def collect_term_names(terms, symbol_names) + terms.each do |term| + symbol_names.add(term.id.s_value) + symbol_names.add(term.alias_name) if term.alias_name + end + end + + # @rbs (Array[untyped] nterms, Set[String] symbol_names) -> void + def collect_nterm_names(nterms, symbol_names) + nterms.each do |nterm| + symbol_names.add(nterm.id.s_value) + end + end + + # @rbs (Array[untyped] parameterized_rules, Set[String] symbol_names) -> void + def check_conflicts(parameterized_rules, symbol_names) + parameterized_rules.each do |param_rule| + next unless symbol_names.include?(param_rule.name) + + @logger.warn("warning: parameterized rule name \"#{param_rule.name}\" conflicts with symbol name") + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/redefined_rules.rb b/tool/lrama/lib/lrama/warnings/redefined_rules.rb new file mode 100644 index 00000000000000..8ac2f1f1034dae --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/redefined_rules.rb @@ -0,0 +1,23 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + class RedefinedRules + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar) -> void + def warn(grammar) + return unless @warnings + + grammar.parameterized_resolver.redefined_rules.each do |rule| + @logger.warn("parameterized rule redefined: #{rule}") + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/required.rb b/tool/lrama/lib/lrama/warnings/required.rb new file mode 100644 index 00000000000000..4ab1ed787ed9eb --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/required.rb @@ -0,0 +1,23 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + class Required + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings = false, **_) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar) -> void + def warn(grammar) + return unless @warnings + + if grammar.required + @logger.warn("currently, %require is simply valid as a grammar but does nothing") + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/warnings/useless_precedence.rb b/tool/lrama/lib/lrama/warnings/useless_precedence.rb new file mode 100644 index 00000000000000..2913d6d7e5f17b --- /dev/null +++ b/tool/lrama/lib/lrama/warnings/useless_precedence.rb @@ -0,0 +1,25 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + class UselessPrecedence + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar, Lrama::States states) -> void + def warn(grammar, states) + return unless @warnings + + grammar.precedences.each do |precedence| + unless precedence.used_by? + @logger.warn("Precedence #{precedence.s_value} (line: #{precedence.lineno}) is defined but not used in any rule.") + end + end + end + end + end +end diff --git a/tool/lrama/template/bison/_yacc.h b/tool/lrama/template/bison/_yacc.h index 34ed6d81f59d74..3e270c91710855 100644 --- a/tool/lrama/template/bison/_yacc.h +++ b/tool/lrama/template/bison/_yacc.h @@ -28,6 +28,7 @@ extern int yydebug; <%-# b4_declare_yylstype -%> <%-# b4_value_type_define -%> /* Value type. */ +<% if output.grammar.union %> #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { @@ -40,6 +41,13 @@ typedef union YYSTYPE YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 # define YYSTYPE_IS_DECLARED 1 #endif +<% else %> +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef int YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif +<% end %> <%-# b4_location_type_define -%> /* Location type. */ diff --git a/tool/lrama/template/diagram/diagram.html b/tool/lrama/template/diagram/diagram.html new file mode 100644 index 00000000000000..3e87e6e5192700 --- /dev/null +++ b/tool/lrama/template/diagram/diagram.html @@ -0,0 +1,102 @@ + + + + Lrama syntax diagrams + + + + + + <%= output.diagrams %> + + + From 352de161ff9765bbb68a2ec015f2c02b5b0b534b Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 24 Dec 2025 11:04:23 +0900 Subject: [PATCH 17/19] Remove an extra dot from `RUBY_API_VERSION_STR` --- version.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/version.c b/version.c index c9a219fb1385c1..554783265201f0 100644 --- a/version.c +++ b/version.c @@ -51,8 +51,7 @@ #define RUBY_API_VERSION_STR \ STRINGIZE(RUBY_API_VERSION_MAJOR) "." \ - STRINGIZE(RUBY_API_VERSION_MINOR) "." \ - "" + STRINGIZE(RUBY_API_VERSION_MINOR) const int ruby_api_version[] = { RUBY_API_VERSION_MAJOR, RUBY_API_VERSION_MINOR, From f7b48456ebffa1dfe7fe201b27c9c34b5703a53e Mon Sep 17 00:00:00 2001 From: Satoshi Tagomori Date: Tue, 23 Dec 2025 22:27:06 +0900 Subject: [PATCH 18/19] Box: show the fully qualified URL of the Ruby::Box doc --- box.c | 5 ++++- test/ruby/test_box.rb | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/box.c b/box.c index 616a8acf7d88b5..830172cdd52345 100644 --- a/box.c +++ b/box.c @@ -902,6 +902,8 @@ rb_box_eval(VALUE box_value, VALUE str) static int box_experimental_warned = 0; +RUBY_EXTERN const char ruby_api_version_name[]; + void rb_initialize_main_box(void) { @@ -914,7 +916,8 @@ rb_initialize_main_box(void) if (!box_experimental_warned) { rb_category_warn(RB_WARN_CATEGORY_EXPERIMENTAL, "Ruby::Box is experimental, and the behavior may change in the future!\n" - "See doc/language/box.md for known issues, etc."); + "See https://docs.ruby-lang.org/en/%s/Ruby/Box.html for known issues, etc.", + ruby_api_version_name); box_experimental_warned = 1; } diff --git a/test/ruby/test_box.rb b/test/ruby/test_box.rb index e584d233ca0145..f35d07c0863dc2 100644 --- a/test/ruby/test_box.rb +++ b/test/ruby/test_box.rb @@ -3,10 +3,10 @@ require 'test/unit' class TestBox < Test::Unit::TestCase - EXPERIMENTAL_WARNINGS = [ - "warning: Ruby::Box is experimental, and the behavior may change in the future!", - "See doc/language/box.md for known issues, etc." - ].join("\n") + EXPERIMENTAL_WARNING_LINE_PATTERNS = [ + /ruby(\.exe)?: warning: Ruby::Box is experimental, and the behavior may change in the future!/, + %r{See https://docs.ruby-lang.org/en/(master|\d\.\d)/Ruby/Box.html for known issues, etc.} + ] ENV_ENABLE_BOX = {'RUBY_BOX' => '1', 'TEST_DIR' => __dir__} def setup @@ -650,8 +650,9 @@ def test_prelude_gems_and_loaded_features end; # No additional warnings except for experimental warnings - assert_includes error.join("\n"), EXPERIMENTAL_WARNINGS assert_equal 2, error.size + assert_match EXPERIMENTAL_WARNING_LINE_PATTERNS[0], error[0] + assert_match EXPERIMENTAL_WARNING_LINE_PATTERNS[1], error[1] assert_includes output.grep(/^before:/).join("\n"), '/bundled_gems.rb' assert_includes output.grep(/^before:/).join("\n"), '/error_highlight.rb' @@ -672,8 +673,9 @@ def test_prelude_gems_and_loaded_features_with_disable_gems puts ["after:", $LOADED_FEATURES.select{ it.end_with?("/error_highlight.rb") }&.first].join end; - assert_includes error.join("\n"), EXPERIMENTAL_WARNINGS assert_equal 2, error.size + assert_match EXPERIMENTAL_WARNING_LINE_PATTERNS[0], error[0] + assert_match EXPERIMENTAL_WARNING_LINE_PATTERNS[1], error[1] refute_includes output.grep(/^before:/).join("\n"), '/bundled_gems.rb' refute_includes output.grep(/^before:/).join("\n"), '/error_highlight.rb' From 88d6c5aaa82105d96c37847723bcf0151deb6497 Mon Sep 17 00:00:00 2001 From: Godfrey Chan Date: Tue, 23 Dec 2025 22:43:40 -0800 Subject: [PATCH 19/19] [DOC] Update ZJIT status in NEWS.md As for Ruby v4.0.0-preview3, ZJIT support is enabled by default on supported platforms. The previous phrasing is not relevant for most users. Replaced with brief instructions for enabling the JIT itself. --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2c5a758e6b06c3..d0a8887ed11874 100644 --- a/NEWS.md +++ b/NEWS.md @@ -510,7 +510,7 @@ A lot of work has gone into making Ractors more stable, performant, and usable. * ZJIT * Introduce an [experimental method-based JIT compiler](https://docs.ruby-lang.org/en/master/jit/zjit_md.html). - To enable `--zjit` support, build Ruby with Rust 1.85.0 or later. + To enable ZJIT on supported platforms, supply the `--zjit` option or call `RubyVM::ZJIT.enable` at runtime. * As of Ruby 4.0.0, ZJIT is faster than the interpreter, but not yet as fast as YJIT. We encourage experimentation with ZJIT, but advise against deploying it in production for now. * Our goal is to make ZJIT faster than YJIT and production-ready in Ruby 4.1.