class Psych::Parser

YAML event parser class. This class parses a YAML document and calls events on the handler that is passed to the constructor. The events can be used for things such as constructing a YAML AST or deserializing YAML documents. It can even be fed back to Psych::Emitter to emit the same document that was parsed.

See Psych::Handler for documentation on the events that Psych::Parser emits.

Here is an example that prints out ever scalar found in a YAML document:

# Handler for detecting scalar values
class ScalarHandler < Psych::Handler
  def scalar value, anchor, tag, plain, quoted, style
    puts value
  end
end

parser = Psych::Parser.new(ScalarHandler.new)
parser.parse(yaml_document)

Here is an example that feeds the parser back in to Psych::Emitter. The YAML document is read from STDIN and written back out to STDERR:

parser = Psych::Parser.new(Psych::Emitter.new($stderr))
parser.parse($stdin)

Psych uses Psych::Parser in combination with Psych::TreeBuilder to construct an AST of the parsed YAML document.

Constants

ANY

Let the parser choose the encoding

UTF16BE

UTF-16-BE Encoding with BOM

UTF16LE

UTF-16-LE Encoding with BOM

UTF8

UTF-8 Encoding

Attributes

external_encoding[W]

Set the encoding for this parser to encoding

handler[RW]

The handler on which events will be called

Public Class Methods

new(handler = Handler.new) click to toggle source

Creates a new Psych::Parser instance with handler. YAML events will be called on handler. See Psych::Parser for more details.

# File ext/psych/lib/psych/parser.rb, line 47
def initialize handler = Handler.new
  @handler = handler
  @external_encoding = ANY
end

Public Instance Methods

mark # → #<Psych::Parser::Mark> click to toggle source

Returns a Psych::Parser::Mark object that contains line, column, and index information.

static VALUE mark(VALUE self)
{
    VALUE mark_klass;
    VALUE args[3];
    yaml_parser_t * parser;

    TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
    mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
    args[0] = INT2NUM(parser->mark.index);
    args[1] = INT2NUM(parser->mark.line);
    args[2] = INT2NUM(parser->mark.column);

    return rb_class_new_instance(3, args, mark_klass);
}
parse(yaml) click to toggle source

Parse the YAML document contained in yaml. Events will be called on the handler set on the parser instance.

See Psych::Parser and Psych::Parser#handler

static VALUE parse(int argc, VALUE *argv, VALUE self)
{
    VALUE yaml, path;
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
    int tainted = 0;
    int state = 0;
    int parser_encoding = YAML_ANY_ENCODING;
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
    VALUE handler = rb_iv_get(self, "@handler");

    if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
        if(rb_respond_to(yaml, id_path))
            path = rb_funcall(yaml, id_path, 0);
        else
            path = rb_str_new2("<unknown>");
    }

    TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);

    yaml_parser_delete(parser);
    yaml_parser_initialize(parser);

    if (OBJ_TAINTED(yaml)) tainted = 1;

    if (rb_respond_to(yaml, id_read)) {
        yaml = transcode_io(yaml, &parser_encoding);
        yaml_parser_set_encoding(parser, parser_encoding);
        yaml_parser_set_input(parser, io_reader, (void *)yaml);
        if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
    } else {
        StringValue(yaml);
        yaml = transcode_string(yaml, &parser_encoding);
        yaml_parser_set_encoding(parser, parser_encoding);
        yaml_parser_set_input_string(
                parser,
                (const unsigned char *)RSTRING_PTR(yaml),
                (size_t)RSTRING_LEN(yaml)
                );
    }

    while(!done) {
        VALUE event_args[5];
        VALUE start_line, start_column, end_line, end_column;

        if(!yaml_parser_parse(parser, &event)) {
            VALUE exception;

            exception = make_exception(parser, path);
            yaml_parser_delete(parser);
            yaml_parser_initialize(parser);

            rb_exc_raise(exception);
        }

        start_line = INT2NUM((long)event.start_mark.line);
        start_column = INT2NUM((long)event.start_mark.column);
        end_line = INT2NUM((long)event.end_mark.line);
        end_column = INT2NUM((long)event.end_mark.column);

        event_args[0] = handler;
        event_args[1] = start_line;
        event_args[2] = start_column;
        event_args[3] = end_line;
        event_args[4] = end_column;
        rb_protect(protected_event_location, (VALUE)event_args, &state);

        switch(event.type) {
            case YAML_STREAM_START_EVENT:
              {
                  VALUE args[2];

                  args[0] = handler;
                  args[1] = INT2NUM((long)event.data.stream_start.encoding);
                  rb_protect(protected_start_stream, (VALUE)args, &state);
              }
              break;
          case YAML_DOCUMENT_START_EVENT:
            {
                VALUE args[4];
                /* Get a list of tag directives (if any) */
                VALUE tag_directives = rb_ary_new();
                /* Grab the document version */
                VALUE version = event.data.document_start.version_directive ?
                    rb_ary_new3(
                        (long)2,
                        INT2NUM((long)event.data.document_start.version_directive->major),
                        INT2NUM((long)event.data.document_start.version_directive->minor)
                        ) : rb_ary_new();

                if(event.data.document_start.tag_directives.start) {
                    yaml_tag_directive_t *start =
                        event.data.document_start.tag_directives.start;
                    yaml_tag_directive_t *end =
                        event.data.document_start.tag_directives.end;
                    for(; start != end; start++) {
                        VALUE handle = Qnil;
                        VALUE prefix = Qnil;
                        if(start->handle) {
                            handle = rb_str_new2((const char *)start->handle);
                            if (tainted) OBJ_TAINT(handle);
                            PSYCH_TRANSCODE(handle, encoding, internal_enc);
                        }

                        if(start->prefix) {
                            prefix = rb_str_new2((const char *)start->prefix);
                            if (tainted) OBJ_TAINT(prefix);
                            PSYCH_TRANSCODE(prefix, encoding, internal_enc);
                        }

                        rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
                    }
                }
                args[0] = handler;
                args[1] = version;
                args[2] = tag_directives;
                args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
                rb_protect(protected_start_document, (VALUE)args, &state);
            }
            break;
          case YAML_DOCUMENT_END_EVENT:
            {
                VALUE args[2];

                args[0] = handler;
                args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
                rb_protect(protected_end_document, (VALUE)args, &state);
            }
            break;
          case YAML_ALIAS_EVENT:
            {
                VALUE args[2];
                VALUE alias = Qnil;
                if(event.data.alias.anchor) {
                    alias = rb_str_new2((const char *)event.data.alias.anchor);
                    if (tainted) OBJ_TAINT(alias);
                    PSYCH_TRANSCODE(alias, encoding, internal_enc);
                }

                args[0] = handler;
                args[1] = alias;
                rb_protect(protected_alias, (VALUE)args, &state);
            }
            break;
          case YAML_SCALAR_EVENT:
            {
                VALUE args[7];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE plain_implicit, quoted_implicit, style;
                VALUE val = rb_str_new(
                    (const char *)event.data.scalar.value,
                    (long)event.data.scalar.length
                    );
                if (tainted) OBJ_TAINT(val);

                PSYCH_TRANSCODE(val, encoding, internal_enc);

                if(event.data.scalar.anchor) {
                    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
                    if (tainted) OBJ_TAINT(anchor);
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
                }

                if(event.data.scalar.tag) {
                    tag = rb_str_new2((const char *)event.data.scalar.tag);
                    if (tainted) OBJ_TAINT(tag);
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
                }

                plain_implicit =
                    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

                quoted_implicit =
                    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.scalar.style);

                args[0] = handler;
                args[1] = val;
                args[2] = anchor;
                args[3] = tag;
                args[4] = plain_implicit;
                args[5] = quoted_implicit;
                args[6] = style;
                rb_protect(protected_scalar, (VALUE)args, &state);
            }
            break;
          case YAML_SEQUENCE_START_EVENT:
            {
                VALUE args[5];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE implicit, style;
                if(event.data.sequence_start.anchor) {
                    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
                    if (tainted) OBJ_TAINT(anchor);
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
                }

                tag = Qnil;
                if(event.data.sequence_start.tag) {
                    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
                    if (tainted) OBJ_TAINT(tag);
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
                }

                implicit =
                    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.sequence_start.style);

                args[0] = handler;
                args[1] = anchor;
                args[2] = tag;
                args[3] = implicit;
                args[4] = style;

                rb_protect(protected_start_sequence, (VALUE)args, &state);
            }
            break;
          case YAML_SEQUENCE_END_EVENT:
            rb_protect(protected_end_sequence, handler, &state);
            break;
          case YAML_MAPPING_START_EVENT:
            {
                VALUE args[5];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE implicit, style;
                if(event.data.mapping_start.anchor) {
                    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
                    if (tainted) OBJ_TAINT(anchor);
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
                }

                if(event.data.mapping_start.tag) {
                    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
                    if (tainted) OBJ_TAINT(tag);
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
                }

                implicit =
                    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.mapping_start.style);

                args[0] = handler;
                args[1] = anchor;
                args[2] = tag;
                args[3] = implicit;
                args[4] = style;

                rb_protect(protected_start_mapping, (VALUE)args, &state);
            }
            break;
          case YAML_MAPPING_END_EVENT:
            rb_protect(protected_end_mapping, handler, &state);
            break;
          case YAML_NO_EVENT:
            rb_protect(protected_empty, handler, &state);
            break;
          case YAML_STREAM_END_EVENT:
            rb_protect(protected_end_stream, handler, &state);
            done = 1;
            break;
        }
        yaml_event_delete(&event);
        if (state) rb_jump_tag(state);
    }

    return self;
}