Skip to content

Commit 54783ec

Browse files
authored
Merge pull request #33 from xiejiangzhi/master
Support unicode range
2 parents 412920b + 98d562f commit 54783ec

File tree

7 files changed

+119
-29
lines changed

7 files changed

+119
-29
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@
1414
mkmf.log
1515
tags
1616
/coverage/
17+
18+
/spec/examples.txt

.rspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
--color
2-
--require spec_helper
2+
--require gem_helper

lib/regexp-examples/chargroup_parser.rb

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
require_relative 'parser_helpers/charset_negation_helper'
2+
require_relative 'parser_helpers/parse_group_helper'
3+
require_relative 'parser_helpers/parse_after_backslash_group_helper'
24

35
module RegexpExamples
46
# A "sub-parser", for char groups in a regular expression
@@ -13,6 +15,8 @@ module RegexpExamples
1315
# [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n")
1416
class ChargroupParser
1517
include CharsetNegationHelper
18+
include ParseGroupHelper
19+
include ParseAfterBackslashGroupHelper
1620

1721
attr_reader :regexp_string, :current_position
1822
alias length current_position
@@ -37,7 +41,6 @@ def parse
3741
parse_after_ampersand
3842
else
3943
@charset.concat parse_checking_backlash
40-
@current_position += 1
4144
end
4245
end
4346

@@ -79,15 +82,23 @@ def parse_checking_backlash
7982
@current_position += 1
8083
parse_after_backslash
8184
else
82-
[next_char]
85+
r = [next_char]
86+
@current_position += 1
87+
r
8388
end
8489
end
8590

8691
def parse_after_backslash
8792
if next_char == 'b'
93+
@current_position += 1
8894
["\b"]
95+
elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
96+
@current_position += 1
97+
parse_backslash_unicode_sequence(Regexp.last_match(1)).result.map(&:to_s)
8998
else
90-
CharSets::BackslashCharMap.fetch(next_char, [next_char])
99+
char = CharSets::BackslashCharMap.fetch(next_char, [next_char])
100+
@current_position += 1
101+
char
91102
end
92103
end
93104

@@ -117,13 +128,18 @@ def parse_sub_group_intersect
117128
end
118129

119130
def parse_after_hyphen
120-
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
131+
r = if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
132+
@current_position += 1
121133
@charset << '-'
134+
elsif rest_of_string =~ /\A-\\u(\h{4}|\{\h{1,4}\})/
135+
@current_position += 3
136+
char = parse_backslash_unicode_sequence(Regexp.last_match(1)).result.first.to_s
137+
@charset.concat((@charset.last..char).to_a)
122138
else
123139
@current_position += 1
124140
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
125141
end
126-
@current_position += 1
142+
r
127143
end
128144

129145
def rest_of_string

regexp-examples.gemspec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ Gem::Specification.new do |s|
1313
s.executables = s.files.grep(/^bin\//) { |f| File.basename(f) }
1414
s.test_files = s.files.grep(/^(test|spec|features)\//)
1515
s.require_paths = ['lib']
16-
s.homepage =
17-
'http://rubygems.org/gems/regexp-examples'
18-
s.add_development_dependency 'bundler'
16+
s.homepage = 'http://rubygems.org/gems/regexp-examples'
17+
s.add_development_dependency 'bundler', '> 1.7'
1918
s.add_development_dependency 'rake', '~> 12.0'
19+
s.add_development_dependency 'pry', '~> 0.12.0'
2020
s.license = 'MIT'
2121
s.required_ruby_version = '>= 2.4.0'
2222
end

spec/gem_helper.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
require 'spec_helper'
2+
3+
require 'coveralls'
4+
Coveralls.wear!
5+
6+
require './lib/regexp-examples.rb'
7+
require 'helpers'
8+
require 'pry'
9+
10+
# Several of these tests (intentionally) use "weird" regex patterns,
11+
# that spam annoying warnings when running.
12+
# E.g. warning: invalid back reference: /\k/
13+
# and warning: character class has ']' without escape: /[]]/
14+
# This config disables those warnings.
15+
$VERBOSE = nil
16+
17+
RSpec.configure do |config|
18+
config.include Helpers
19+
end

spec/regexp-examples_spec.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def self.examples_are_empty(*regexps)
7272
/[abc]/,
7373
/[a-c]/,
7474
/[abc-e]/,
75+
/[\u4e00-\u9fa5]/,
7576
/[^a-zA-Z]/,
7677
/[\w]/,
7778
/[]]/,

spec/spec_helper.rb

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,30 @@
1-
require 'coveralls'
2-
Coveralls.wear!
3-
4-
require './lib/regexp-examples.rb'
5-
require 'helpers'
6-
require 'pry'
7-
8-
# Several of these tests (intentionally) use "weird" regex patterns,
9-
# that spam annoying warnings when running.
10-
# E.g. warning: invalid back reference: /\k/
11-
# and warning: character class has ']' without escape: /[]]/
12-
# This config disables those warnings.
13-
$VERBOSE = nil
14-
1+
# This file was generated by the `rspec --init` command. Conventionally, all
2+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3+
# The generated `.rspec` file contains `--require spec_helper` which will cause
4+
# this file to always be loaded, without a need to explicitly require it in any
5+
# files.
6+
#
7+
# Given that it is always loaded, you are encouraged to keep this file as
8+
# light-weight as possible. Requiring heavyweight dependencies from this file
9+
# will add to the boot time of your test suite on EVERY test run, even for an
10+
# individual file that may not need all of that loaded. Instead, consider making
11+
# a separate helper file that requires the additional dependencies and performs
12+
# the additional setup, and require it from the spec files that actually need
13+
# it.
14+
#
15+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
1516
RSpec.configure do |config|
16-
config.include Helpers
17-
17+
# rspec-expectations config goes here. You can use an alternate
18+
# assertion/expectation library such as wrong or the stdlib/minitest
19+
# assertions if you prefer.
1820
config.expect_with :rspec do |expectations|
1921
# This option will default to `true` in RSpec 4. It makes the `description`
2022
# and `failure_message` of custom matchers include text for helper methods
2123
# defined using `chain`, e.g.:
22-
# be_bigger_than(2).and_smaller_than(4).description
23-
# # => "be bigger than 2 and smaller than 4"
24+
# be_bigger_than(2).and_smaller_than(4).description
25+
# # => "be bigger than 2 and smaller than 4"
2426
# ...rather than:
25-
# # => "be bigger than 2"
27+
# # => "be bigger than 2"
2628
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
2729
end
2830

@@ -35,10 +37,60 @@
3537
mocks.verify_partial_doubles = true
3638
end
3739

40+
# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
41+
# have no way to turn it off -- the option exists only for backwards
42+
# compatibility in RSpec 3). It causes shared context metadata to be
43+
# inherited by the metadata hash of host groups and examples, rather than
44+
# triggering implicit auto-inclusion in groups with matching metadata.
45+
config.shared_context_metadata_behavior = :apply_to_host_groups
46+
47+
# This allows you to limit a spec run to individual examples or groups
48+
# you care about by tagging them with `:focus` metadata. When nothing
49+
# is tagged with `:focus`, all examples get run. RSpec also provides
50+
# aliases for `it`, `describe`, and `context` that include `:focus`
51+
# metadata: `fit`, `fdescribe` and `fcontext`, respectively.
52+
config.filter_run_when_matching :focus
53+
54+
# Allows RSpec to persist some state between runs in order to support
55+
# the `--only-failures` and `--next-failure` CLI options. We recommend
56+
# you configure your source control system to ignore this file.
57+
config.example_status_persistence_file_path = "spec/examples.txt"
58+
59+
# Limits the available syntax to the non-monkey patched syntax that is
60+
# recommended. For more details, see:
61+
# - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
62+
# - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
63+
# - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
3864
config.disable_monkey_patching!
3965

66+
# This setting enables warnings. It's recommended, but in some cases may
67+
# be too noisy due to issues in dependencies.
68+
config.warnings = true
69+
70+
# Many RSpec users commonly either run the entire suite or an individual
71+
# file, and it's useful to allow more verbose output when running an
72+
# individual spec file.
73+
if config.files_to_run.one?
74+
# Use the documentation formatter for detailed output,
75+
# unless a formatter has already been configured
76+
# (e.g. via a command-line flag).
77+
config.default_formatter = "doc"
78+
end
79+
4080
# Print the 10 slowest examples and example groups at the
4181
# end of the spec run, to help surface which specs are running
4282
# particularly slow.
43-
# config.profile_examples = 10
83+
config.profile_examples = 10
84+
85+
# Run specs in random order to surface order dependencies. If you find an
86+
# order dependency and want to debug it, you can fix the order by providing
87+
# the seed, which is printed after each run.
88+
# --seed 1234
89+
config.order = :random
90+
91+
# Seed global randomization in this process using the `--seed` CLI option.
92+
# Setting this allows you to use `--seed` to deterministically reproduce
93+
# test failures related to randomization by passing the same `--seed` value
94+
# as the one that triggered the failure.
95+
Kernel.srand config.seed
4496
end

0 commit comments

Comments
 (0)