diff options
author | Aaron Patterson <aaron.patterson@gmail.com> | 2010-05-18 20:03:01 -0700 |
---|---|---|
committer | Aaron Patterson <aaron.patterson@gmail.com> | 2010-05-18 20:03:01 -0700 |
commit | 07667473d0ec204eb2cfa7b8938ae2f5010e3220 (patch) | |
tree | 475d3cab0e01901e30d49cd39a00f6e908cede0d | |
parent | f43e454615714166c91b5628d3be644d560d97d1 (diff) | |
download | psych-07667473d0ec204eb2cfa7b8938ae2f5010e3220.zip |
adding UTF-16 and UTF-16+BOM support
-rw-r--r-- | ext/psych/emitter.c | 12 | ||||
-rw-r--r-- | ext/psych/parser.c | 76 | ||||
-rw-r--r-- | lib/psych/nodes/stream.rb | 2 | ||||
-rw-r--r-- | test/psych/test_parser.rb | 19 |
4 files changed, 75 insertions, 34 deletions
diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c index befa98e..a06304a 100644 --- a/ext/psych/emitter.c +++ b/ext/psych/emitter.c @@ -17,15 +17,21 @@ static int writer(void *ctx, unsigned char *buffer, size_t size) return (int)NUM2INT(wrote); } -static void dealloc(yaml_emitter_t * emitter) +static void dealloc(void * ptr) { + yaml_emitter_t * emitter; + + emitter = (yaml_emitter_t *)ptr; yaml_emitter_delete(emitter); - free(emitter); + xfree(emitter); } static VALUE allocate(VALUE klass) { - yaml_emitter_t * emitter = malloc(sizeof(yaml_emitter_t)); + yaml_emitter_t * emitter; + + emitter = xmalloc(sizeof(yaml_emitter_t)); + yaml_emitter_initialize(emitter); yaml_emitter_set_unicode(emitter, 1); yaml_emitter_set_indent(emitter, 2); diff --git a/ext/psych/parser.c b/ext/psych/parser.c index 0fa02b9..41260ab 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -39,6 +39,25 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) return 1; } +static void dealloc(void * ptr) +{ + yaml_parser_t * parser; + + parser = (yaml_parser_t *)ptr; + yaml_parser_delete(parser); + xfree(parser); +} + +static VALUE allocate(VALUE klass) +{ + yaml_parser_t * parser; + + parser = xmalloc(sizeof(yaml_parser_t)); + yaml_parser_initialize(parser); + + return Data_Wrap_Struct(klass, 0, dealloc, parser); +} + /* * call-seq: * parser.parse(yaml) @@ -50,35 +69,33 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) */ static VALUE parse(VALUE self, VALUE yaml) { - yaml_parser_t parser; + yaml_parser_t * parser; yaml_event_t event; int done = 0; #ifdef HAVE_RUBY_ENCODING_H - int encoding = rb_enc_find_index("ASCII-8BIT"); - rb_encoding * internal_enc; + int encoding = rb_utf8_encindex(); + rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); - - yaml_parser_initialize(&parser); + Data_Get_Struct(self, yaml_parser_t, parser); if(rb_respond_to(yaml, id_read)) { - yaml_parser_set_input(&parser, io_reader, (void *)yaml); + yaml_parser_set_input(parser, io_reader, (void *)yaml); } else { StringValue(yaml); yaml_parser_set_input_string( - &parser, + parser, (const unsigned char *)RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml) ); } while(!done) { - if(!yaml_parser_parse(&parser, &event)) { - size_t line = parser.mark.line; - size_t column = parser.mark.column; + if(!yaml_parser_parse(parser, &event)) { + size_t line = parser->mark.line; + size_t column = parser->mark.column; - yaml_parser_delete(&parser); rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d", (int)line, (int)column); } @@ -86,25 +103,6 @@ static VALUE parse(VALUE self, VALUE yaml) switch(event.type) { case YAML_STREAM_START_EVENT: -#ifdef HAVE_RUBY_ENCODING_H - switch(event.data.stream_start.encoding) { - case YAML_ANY_ENCODING: - break; - case YAML_UTF8_ENCODING: - encoding = rb_enc_find_index("UTF-8"); - break; - case YAML_UTF16LE_ENCODING: - encoding = rb_enc_find_index("UTF-16LE"); - break; - case YAML_UTF16BE_ENCODING: - encoding = rb_enc_find_index("UTF-16BE"); - break; - default: - break; - } - internal_enc = rb_default_internal_encoding(); -#endif - rb_funcall(handler, id_start_stream, 1, INT2NUM((long)event.data.stream_start.encoding) ); @@ -286,6 +284,22 @@ static VALUE parse(VALUE self, VALUE yaml) return self; } +/* + * call-seq: + * parser.external_encoding=(encoding) + * + * Set the encoding for this parser to +encoding+ + */ +static VALUE set_external_encoding(VALUE self, VALUE encoding) +{ + yaml_parser_t * parser; + + Data_Get_Struct(self, yaml_parser_t, parser); + yaml_parser_set_encoding(parser, NUM2INT(encoding)); + + return encoding; +} + void Init_psych_parser() { #if 0 @@ -293,6 +307,7 @@ void Init_psych_parser() #endif cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject); + rb_define_alloc_func(cPsychParser, allocate); /* Any encoding: Let the parser choose the encoding */ rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING)); @@ -309,6 +324,7 @@ void Init_psych_parser() ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError); rb_define_method(cPsychParser, "parse", parse, 1); + rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1); id_read = rb_intern("read"); id_empty = rb_intern("empty"); diff --git a/lib/psych/nodes/stream.rb b/lib/psych/nodes/stream.rb index f4aab5a..7cf5e03 100644 --- a/lib/psych/nodes/stream.rb +++ b/lib/psych/nodes/stream.rb @@ -21,7 +21,7 @@ module Psych UTF16BE = Psych::Parser::UTF16BE # The encoding used for this stream - attr_reader :encoding + attr_accessor :encoding ### # Create a new Psych::Nodes::Stream node with an +encoding+ that diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb index 9ef2a41..8c44833 100644 --- a/test/psych/test_parser.rb +++ b/test/psych/test_parser.rb @@ -1,3 +1,5 @@ +# coding: utf-8 + require_relative 'helper' module Psych @@ -24,6 +26,23 @@ module Psych @parser = Psych::Parser.new EventCatcher.new end + def test_bom + tadpole = 'おたまじゃくし' + + # BOM + text + yml = "\uFEFF#{tadpole}".encode('UTF-16LE') + @parser.parse yml + assert_equal tadpole, @parser.handler.calls[2][1].first + end + + def test_external_encoding + tadpole = 'おたまじゃくし' + + @parser.external_encoding = Psych::Parser::UTF16LE + @parser.parse tadpole.encode 'UTF-16LE' + assert_equal tadpole, @parser.handler.calls[2][1].first + end + def test_bogus_io o = Object.new def o.read len; self end |