Merge pull request #400 from Shopify/remove-string-cache

Remove string_cache in ScalarScanner
author: SHIBATA Hiroshi <hsbt@ruby-lang.org> 2019-05-07 07:33:33 +0900
committer: GitHub <noreply@github.com> 2019-05-07 07:33:33 +0900
commit: 6ec6e475e8afcf7868b0407fc08014aed886ecf1 (patch)
tree: 3b6c934a64f8660c0bbc5091c1cb58b342a23a26
parent: f3a37e6bc1c2a98bfc9fafc389ea05622c744af9 (diff)
parent: 7d04834b79aa6677b1bff42161311cb79809ed7d (diff)
download: psych-6ec6e475e8afcf7868b0407fc08014aed886ecf1.zip
4 files changed, 40 insertions, 39 deletions
diff --git a/.travis.yml b/.travis.yml
index 6ad43f6..7f1a80c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,4 @@
 rvm:
-  - 2.3.8
   - 2.4.5
   - 2.5.3
   - 2.6.0
diff --git a/appveyor.yml b/appveyor.yml
index 4df19c9..90ed36c 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -10,8 +10,6 @@ test_script:
 deploy: off
 environment:
   matrix:
-    - ruby_version: "23"
-    - ruby_version: "23-x64"
     - ruby_version: "24"
     - ruby_version: "24-x64"
     - ruby_version: "25"
diff --git a/lib/psych/scalar_scanner.rb b/lib/psych/scalar_scanner.rb
index 29c156c..cea2a45 100644
--- a/lib/psych/scalar_scanner.rb
+++ b/lib/psych/scalar_scanner.rb
@@ -14,16 +14,15 @@ module Psych
               |\.(nan|NaN|NAN)(?# not a number))$/x
 
     # Taken from http://yaml.org/type/int.html
-    INTEGER = /^(?:[-+]?0b[0-1_]+          (?# base 2)
-                  |[-+]?0[0-7_]+           (?# base 8)
-                  |[-+]?(?:0|[1-9][0-9_]*) (?# base 10)
-                  |[-+]?0x[0-9a-fA-F_]+    (?# base 16))$/x
+    INTEGER = /^(?:[-+]?0b[0-1_,]+          (?# base 2)
+                  |[-+]?0[0-7_,]+           (?# base 8)
+                  |[-+]?(?:0|[1-9][0-9_,]*) (?# base 10)
+                  |[-+]?0x[0-9a-fA-F_,]+    (?# base 16))$/x
 
     attr_reader :class_loader
 
     # Create a new scanner
     def initialize class_loader
-      @string_cache = {}
       @symbol_cache = {}
       @class_loader = class_loader
     end
@@ -31,81 +30,70 @@ module Psych
     # Tokenize +string+ returning the Ruby object
     def tokenize string
       return nil if string.empty?
-      return string if @string_cache.key?(string)
       return @symbol_cache[string] if @symbol_cache.key?(string)
 
-      case string
       # Check for a String type, being careful not to get caught by hash keys, hex values, and
       # special floats (e.g., -.inf).
-      when /^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/, /\n/
-        if string.length > 5
-          @string_cache[string] = true
-          return string
-        end
+      if string.match?(/^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/) || string.match?(/\n/)
+        return string if string.length > 5
 
-        case string
-        when /^[^ytonf~]/i
-          @string_cache[string] = true
+        if string.match?(/^[^ytonf~]/i)
           string
-        when '~', /^null$/i
+        elsif string == '~' || string.match?(/^null$/i)
           nil
-        when /^(yes|true|on)$/i
+        elsif string.match?(/^(yes|true|on)$/i)
           true
-        when /^(no|false|off)$/i
+        elsif string.match?(/^(no|false|off)$/i)
           false
         else
-          @string_cache[string] = true
           string
         end
-      when TIME
+      elsif string.match?(TIME)
         begin
           parse_time string
         rescue ArgumentError
           string
         end
-      when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/
+      elsif string.match?(/^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/)
         require 'date'
         begin
           class_loader.date.strptime(string, '%Y-%m-%d')
         rescue ArgumentError
           string
         end
-      when /^\.inf$/i
+      elsif string.match?(/^\.inf$/i)
         Float::INFINITY
-      when /^-\.inf$/i
+      elsif string.match?(/^-\.inf$/i)
         -Float::INFINITY
-      when /^\.nan$/i
+      elsif string.match?(/^\.nan$/i)
         Float::NAN
-      when /^:./
+      elsif string.match?(/^:./)
         if string =~ /^:(["'])(.*)\1/
           @symbol_cache[string] = class_loader.symbolize($2.sub(/^:/, ''))
         else
           @symbol_cache[string] = class_loader.symbolize(string.sub(/^:/, ''))
         end
-      when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/
+      elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/)
         i = 0
         string.split(':').each_with_index do |n,e|
           i += (n.to_i * 60 ** (e - 2).abs)
         end
         i
-      when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/
+      elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/)
         i = 0
         string.split(':').each_with_index do |n,e|
           i += (n.to_f * 60 ** (e - 2).abs)
         end
         i
-      when FLOAT
-        if string =~ /\A[-+]?\.\Z/
-          @string_cache[string] = true
+      elsif string.match?(FLOAT)
+        if string.match?(/\A[-+]?\.\Z/)
           string
         else
           Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
         end
+      elsif string.match?(INTEGER)
+        parse_int string
       else
-        int = parse_int string.gsub(/[,_]/, '')
-        return int if int
-
-        @string_cache[string] = true
         string
       end
     end
@@ -113,8 +101,7 @@ module Psych
     ###
     # Parse and return an int from +string+
     def parse_int string
-      return unless INTEGER === string
-      Integer(string)
+      Integer(string.gsub(/[,]/, ''))
     end
 
     ###
diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb
index ebe8daf..d12a905 100644
--- a/test/psych/test_scalar_scanner.rb
+++ b/test/psych/test_scalar_scanner.rb
@@ -113,5 +113,22 @@ module Psych
     def test_scan_strings_starting_with_underscores
       assert_equal "_100", ss.tokenize('_100')
     end
+
+    def test_scan_int_commas_and_underscores
+      # NB: This test is to ensure backward compatibility with prior Psych versions,
+      # not to test against any actual YAML specification.
+      assert_equal 123_456_789, ss.tokenize('123_456_789')
+      assert_equal 123_456_789, ss.tokenize('123,456,789')
+      assert_equal 123_456_789, ss.tokenize('1_2,3,4_5,6_789')
+
+      assert_equal 0b010101010, ss.tokenize('0b010101010')
+      assert_equal 0b010101010, ss.tokenize('0b0,1_0,1_,0,1_01,0')
+
+      assert_equal 01234567, ss.tokenize('01234567')
+      assert_equal 01234567, ss.tokenize('0_,,,1_2,_34567')
+
+      assert_equal 0x123456789abcdef, ss.tokenize('0x123456789abcdef')
+      assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef')
+    end
   end
 end
author	SHIBATA Hiroshi <hsbt@ruby-lang.org>	2019-05-07 07:33:33 +0900
committer	GitHub <noreply@github.com>	2019-05-07 07:33:33 +0900
commit	6ec6e475e8afcf7868b0407fc08014aed886ecf1 (patch)
tree	3b6c934a64f8660c0bbc5091c1cb58b342a23a26
parent	f3a37e6bc1c2a98bfc9fafc389ea05622c744af9 (diff)
parent	7d04834b79aa6677b1bff42161311cb79809ed7d (diff)
download	psych-6ec6e475e8afcf7868b0407fc08014aed886ecf1.zip