diff options
-rw-r--r-- | ext/java/PsychParser.java | 7 | ||||
-rw-r--r-- | test/psych/test_encoding.rb | 12 |
2 files changed, 19 insertions, 0 deletions
diff --git a/ext/java/PsychParser.java b/ext/java/PsychParser.java index b3e747e..f5b6faf 100644 --- a/ext/java/PsychParser.java +++ b/ext/java/PsychParser.java @@ -33,6 +33,8 @@ import java.nio.charset.Charset; import java.util.Map; import org.jcodings.Encoding; +import org.jcodings.specific.UTF16BEEncoding; +import org.jcodings.specific.UTF16LEEncoding; import org.jcodings.specific.UTF8Encoding; import org.jcodings.unicode.UnicodeEncoding; import org.jruby.Ruby; @@ -162,6 +164,11 @@ public class PsychParser extends RubyObject { if (yaml instanceof RubyIO) { Encoding enc = ((RubyIO) yaml).getReadEncoding(); charset = enc.getCharset(); + + // libyaml treats non-utf encodings as utf-8 and hopes for the best. + if (!(enc instanceof UTF8Encoding) && !(enc instanceof UTF16LEEncoding) && !(enc instanceof UTF16BEEncoding)) { + charset = UTF8Encoding.INSTANCE.getCharset(); + } } if (charset == null) { // If we can't get it from the IO or it doesn't have a charset, fall back on UTF-8 diff --git a/test/psych/test_encoding.rb b/test/psych/test_encoding.rb index a4f9f03..01ebe25 100644 --- a/test/psych/test_encoding.rb +++ b/test/psych/test_encoding.rb @@ -106,6 +106,18 @@ module Psych } end + def test_io_utf8_read_as_binary + Tempfile.create(['utf8', 'yml']) {|t| + t.binmode + t.write '--- こんにちは!'.encode('UTF-8') + t.close + + File.open(t.path, 'rb', :encoding => 'ascii-8bit') do |f| + assert_equal "こんにちは!", Psych.load(f) + end + } + end + def test_emit_alias @emitter.start_stream Psych::Parser::UTF8 @emitter.start_document [], [], true |