diff options
author | SHIBATA Hiroshi <hsbt@ruby-lang.org> | 2019-05-07 07:33:33 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-07 07:33:33 +0900 |
commit | 6ec6e475e8afcf7868b0407fc08014aed886ecf1 (patch) | |
tree | 3b6c934a64f8660c0bbc5091c1cb58b342a23a26 | |
parent | f3a37e6bc1c2a98bfc9fafc389ea05622c744af9 (diff) | |
parent | 7d04834b79aa6677b1bff42161311cb79809ed7d (diff) | |
download | psych-6ec6e475e8afcf7868b0407fc08014aed886ecf1.zip |
Merge pull request #400 from Shopify/remove-string-cache
Remove string_cache in ScalarScanner
-rw-r--r-- | .travis.yml | 1 | ||||
-rw-r--r-- | appveyor.yml | 2 | ||||
-rw-r--r-- | lib/psych/scalar_scanner.rb | 59 | ||||
-rw-r--r-- | test/psych/test_scalar_scanner.rb | 17 |
4 files changed, 40 insertions, 39 deletions
diff --git a/.travis.yml b/.travis.yml index 6ad43f6..7f1a80c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,4 @@ rvm: - - 2.3.8 - 2.4.5 - 2.5.3 - 2.6.0 diff --git a/appveyor.yml b/appveyor.yml index 4df19c9..90ed36c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,8 +10,6 @@ test_script: deploy: off environment: matrix: - - ruby_version: "23" - - ruby_version: "23-x64" - ruby_version: "24" - ruby_version: "24-x64" - ruby_version: "25" diff --git a/lib/psych/scalar_scanner.rb b/lib/psych/scalar_scanner.rb index 29c156c..cea2a45 100644 --- a/lib/psych/scalar_scanner.rb +++ b/lib/psych/scalar_scanner.rb @@ -14,16 +14,15 @@ module Psych |\.(nan|NaN|NAN)(?# not a number))$/x # Taken from http://yaml.org/type/int.html - INTEGER = /^(?:[-+]?0b[0-1_]+ (?# base 2) - |[-+]?0[0-7_]+ (?# base 8) - |[-+]?(?:0|[1-9][0-9_]*) (?# base 10) - |[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x + INTEGER = /^(?:[-+]?0b[0-1_,]+ (?# base 2) + |[-+]?0[0-7_,]+ (?# base 8) + |[-+]?(?:0|[1-9][0-9_,]*) (?# base 10) + |[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x attr_reader :class_loader # Create a new scanner def initialize class_loader - @string_cache = {} @symbol_cache = {} @class_loader = class_loader end @@ -31,81 +30,70 @@ module Psych # Tokenize +string+ returning the Ruby object def tokenize string return nil if string.empty? - return string if @string_cache.key?(string) return @symbol_cache[string] if @symbol_cache.key?(string) - case string # Check for a String type, being careful not to get caught by hash keys, hex values, and # special floats (e.g., -.inf). - when /^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/, /\n/ - if string.length > 5 - @string_cache[string] = true - return string - end + if string.match?(/^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/) || string.match?(/\n/) + return string if string.length > 5 - case string - when /^[^ytonf~]/i - @string_cache[string] = true + if string.match?(/^[^ytonf~]/i) string - when '~', /^null$/i + elsif string == '~' || string.match?(/^null$/i) nil - when /^(yes|true|on)$/i + elsif string.match?(/^(yes|true|on)$/i) true - when /^(no|false|off)$/i + elsif string.match?(/^(no|false|off)$/i) false else - @string_cache[string] = true string end - when TIME + elsif string.match?(TIME) begin parse_time string rescue ArgumentError string end - when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/ + elsif string.match?(/^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/) require 'date' begin class_loader.date.strptime(string, '%Y-%m-%d') rescue ArgumentError string end - when /^\.inf$/i + elsif string.match?(/^\.inf$/i) Float::INFINITY - when /^-\.inf$/i + elsif string.match?(/^-\.inf$/i) -Float::INFINITY - when /^\.nan$/i + elsif string.match?(/^\.nan$/i) Float::NAN - when /^:./ + elsif string.match?(/^:./) if string =~ /^:(["'])(.*)\1/ @symbol_cache[string] = class_loader.symbolize($2.sub(/^:/, '')) else @symbol_cache[string] = class_loader.symbolize(string.sub(/^:/, '')) end - when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/ + elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/) i = 0 string.split(':').each_with_index do |n,e| i += (n.to_i * 60 ** (e - 2).abs) end i - when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/ + elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/) i = 0 string.split(':').each_with_index do |n,e| i += (n.to_f * 60 ** (e - 2).abs) end i - when FLOAT - if string =~ /\A[-+]?\.\Z/ - @string_cache[string] = true + elsif string.match?(FLOAT) + if string.match?(/\A[-+]?\.\Z/) string else Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1')) end + elsif string.match?(INTEGER) + parse_int string else - int = parse_int string.gsub(/[,_]/, '') - return int if int - - @string_cache[string] = true string end end @@ -113,8 +101,7 @@ module Psych ### # Parse and return an int from +string+ def parse_int string - return unless INTEGER === string - Integer(string) + Integer(string.gsub(/[,]/, '')) end ### diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb index ebe8daf..d12a905 100644 --- a/test/psych/test_scalar_scanner.rb +++ b/test/psych/test_scalar_scanner.rb @@ -113,5 +113,22 @@ module Psych def test_scan_strings_starting_with_underscores assert_equal "_100", ss.tokenize('_100') end + + def test_scan_int_commas_and_underscores + # NB: This test is to ensure backward compatibility with prior Psych versions, + # not to test against any actual YAML specification. + assert_equal 123_456_789, ss.tokenize('123_456_789') + assert_equal 123_456_789, ss.tokenize('123,456,789') + assert_equal 123_456_789, ss.tokenize('1_2,3,4_5,6_789') + + assert_equal 0b010101010, ss.tokenize('0b010101010') + assert_equal 0b010101010, ss.tokenize('0b0,1_0,1_,0,1_01,0') + + assert_equal 01234567, ss.tokenize('01234567') + assert_equal 01234567, ss.tokenize('0_,,,1_2,_34567') + + assert_equal 0x123456789abcdef, ss.tokenize('0x123456789abcdef') + assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef') + end end end |