1#include "prism/extension.h"
14VALUE rb_cPrismLocation;
16VALUE rb_cPrismComment;
17VALUE rb_cPrismInlineComment;
18VALUE rb_cPrismEmbDocComment;
19VALUE rb_cPrismMagicComment;
20VALUE rb_cPrismParseError;
21VALUE rb_cPrismParseWarning;
23VALUE rb_cPrismParseResult;
24VALUE rb_cPrismLexResult;
25VALUE rb_cPrismParseLexResult;
26VALUE rb_cPrismStringQuery;
28VALUE rb_cPrismDebugEncoding;
30ID rb_id_option_command_line;
31ID rb_id_option_encoding;
32ID rb_id_option_filepath;
33ID rb_id_option_frozen_string_literal;
35ID rb_id_option_main_script;
36ID rb_id_option_partial_script;
37ID rb_id_option_scopes;
38ID rb_id_option_version;
50check_string(
VALUE value) {
57 return RSTRING_PTR(value);
70 pm_string_constant_init(input, RSTRING_PTR(
string), RSTRING_LEN(
string));
89 if (!pm_options_scopes_init(options, scopes_count)) {
94 for (
size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
95 VALUE scope = rb_ary_entry(scopes, scope_index);
106 if (!pm_options_scope_init(options_scope, locals_count)) {
111 for (
size_t local_index = 0; local_index < locals_count; local_index++) {
112 VALUE local = rb_ary_entry(scope, local_index);
122 const char *name = rb_id2name(
SYM2ID(local));
123 pm_string_constant_init(scope_local, name, strlen(name));
136 if (key_id == rb_id_option_filepath) {
137 if (!
NIL_P(value)) pm_options_filepath_set(options, check_string(value));
138 }
else if (key_id == rb_id_option_encoding) {
141 pm_options_encoding_locked_set(options,
true);
143 pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
146 }
else if (key_id == rb_id_option_line) {
147 if (!
NIL_P(value)) pm_options_line_set(options,
NUM2INT(value));
148 }
else if (key_id == rb_id_option_frozen_string_literal) {
149 if (!
NIL_P(value)) pm_options_frozen_string_literal_set(options,
RTEST(value));
150 }
else if (key_id == rb_id_option_version) {
152 const char *version = check_string(value);
154 if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
155 rb_raise(rb_eArgError,
"invalid version: %" PRIsVALUE, value);
158 }
else if (key_id == rb_id_option_scopes) {
159 if (!
NIL_P(value)) build_options_scopes(options, value);
160 }
else if (key_id == rb_id_option_command_line) {
162 const char *
string = check_string(value);
163 uint8_t command_line = 0;
165 for (
size_t index = 0; index < strlen(
string); index++) {
166 switch (
string[index]) {
173 default: rb_raise(rb_eArgError,
"invalid command line flag: '%c'",
string[index]);
break;
177 pm_options_command_line_set(options, command_line);
179 }
else if (key_id == rb_id_option_main_script) {
180 if (!
NIL_P(value)) pm_options_main_script_set(options,
RTEST(value));
181 }
else if (key_id == rb_id_option_partial_script) {
182 if (!
NIL_P(value)) pm_options_partial_script_set(options,
RTEST(value));
184 rb_raise(rb_eArgError,
"unknown keyword: %" PRIsVALUE, key);
205build_options(
VALUE argument) {
218 if (!
NIL_P(keywords)) {
223 rb_protect(build_options, (
VALUE) argument, &state);
226 pm_options_free(options);
231 if (!
NIL_P(filepath)) {
233 pm_options_free(options);
237 pm_options_filepath_set(options, RSTRING_PTR(filepath));
250 extract_options(options,
Qnil, keywords);
251 input_load_string(input,
string);
264 *encoded_filepath = rb_str_encode_ospath(filepath);
265 extract_options(options, *encoded_filepath, keywords);
267 const char *source = (
const char *) pm_string_source(&options->
filepath);
270 switch (result = pm_string_file_init(input, source)) {
274 pm_options_free(options);
277 int e = rb_w32_map_errno(GetLastError());
286 pm_options_free(options);
290 pm_options_free(options);
291 rb_raise(
rb_eRuntimeError,
"Unknown error (%d) initializing file: %s", result, source);
296#ifndef PRISM_EXCLUDE_SERIALIZATION
308 if (!pm_buffer_init(&buffer)) {
313 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
316 pm_serialize(&parser, node, &buffer);
318 VALUE result =
rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
319 pm_node_destroy(&parser, node);
320 pm_buffer_free(&buffer);
321 pm_parser_free(&parser);
337 string_options(argc, argv, &input, &options);
339#ifdef PRISM_BUILD_DEBUG
340 size_t length = pm_string_length(&input);
342 memcpy(dup, pm_string_source(&input), length);
343 pm_string_constant_init(&input, dup, length);
346 VALUE value = dump_input(&input, &options);
348#ifdef PRISM_BUILD_DEBUG
352 pm_string_free(&input);
353 pm_options_free(&options);
370 VALUE encoded_filepath;
371 file_options(argc, argv, &input, &options, &encoded_filepath);
373 VALUE value = dump_input(&input, &options);
374 pm_string_free(&input);
375 pm_options_free(&options);
394 VALUE location_argv[] = {
397 LONG2FIX(comment->location.end - comment->location.start)
400 VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
416 VALUE key_loc_argv[] = {
422 VALUE value_loc_argv[] = {
428 VALUE magic_comment_argv[] = {
436 return magic_comments;
467 VALUE location_argv[] = {
474 switch (error->
level) {
476 level =
ID2SYM(rb_intern(
"syntax"));
479 level =
ID2SYM(rb_intern(
"argument"));
482 level =
ID2SYM(rb_intern(
"load"));
488 VALUE error_argv[] = {
510 VALUE location_argv[] = {
517 switch (warning->
level) {
519 level =
ID2SYM(rb_intern(
"default"));
522 level =
ID2SYM(rb_intern(
"verbose"));
528 VALUE warning_argv[] = {
546 VALUE result_argv[] = {
548 parser_comments(parser, source),
549 parser_magic_comments(parser, source),
550 parser_data_loc(parser, source),
551 parser_errors(parser, encoding, source),
552 parser_warnings(parser, encoding, source),
583 VALUE yields = rb_assoc_new(
584 pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source),
588 rb_ary_push(parse_lex_data->tokens, yields);
597parse_lex_encoding_changed_callback(
pm_parser_t *parser) {
599 parse_lex_data->encoding = rb_enc_find(parser->
encoding->
name);
605 VALUE tokens = parse_lex_data->tokens;
606 for (
long index = 0; index <
RARRAY_LEN(tokens); index++) {
607 VALUE yields = rb_ary_entry(tokens, index);
608 VALUE token = rb_ary_entry(yields, 0);
611 rb_enc_associate(value, parse_lex_data->encoding);
623 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
624 pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
626 VALUE source_string =
rb_str_new((
const char *) pm_string_source(input), pm_string_length(input));
632 .tokens = rb_ary_new(),
633 .encoding = rb_utf8_encoding()
638 .
data = (
void *) data,
639 .callback = parse_lex_token,
650 rb_enc_associate(source_string, encoding);
658 VALUE value = rb_ary_new_capa(2);
659 rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
660 rb_ary_push(value, parse_lex_data.tokens);
661 result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
663 result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source);
666 pm_node_destroy(&parser, node);
667 pm_parser_free(&parser);
683 string_options(argc, argv, &input, &options);
685 VALUE result = parse_lex_input(&input, &options,
false);
686 pm_string_free(&input);
687 pm_options_free(&options);
704 VALUE encoded_filepath;
705 file_options(argc, argv, &input, &options, &encoded_filepath);
707 VALUE value = parse_lex_input(&input, &options,
false);
708 pm_string_free(&input);
709 pm_options_free(&options);
724 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
729 VALUE source = pm_source_new(&parser, encoding);
730 VALUE value = pm_ast_new(&parser, node, encoding, source);
731 VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ;
733 pm_node_destroy(&parser, node);
734 pm_parser_free(&parser);
783 string_options(argc, argv, &input, &options);
785#ifdef PRISM_BUILD_DEBUG
786 size_t length = pm_string_length(&input);
788 memcpy(dup, pm_string_source(&input), length);
789 pm_string_constant_init(&input, dup, length);
792 VALUE value = parse_input(&input, &options);
794#ifdef PRISM_BUILD_DEBUG
798 pm_string_free(&input);
799 pm_options_free(&options);
811parse_file(
int argc,
VALUE *argv,
VALUE self) {
815 VALUE encoded_filepath;
816 file_options(argc, argv, &input, &options, &encoded_filepath);
818 VALUE value = parse_input(&input, &options);
819 pm_string_free(&input);
820 pm_options_free(&options);
831 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
834 pm_node_destroy(&parser, node);
835 pm_parser_free(&parser);
851 string_options(argc, argv, &input, &options);
852 profile_input(&input, &options);
853 pm_string_free(&input);
854 pm_options_free(&options);
868profile_file(
int argc,
VALUE *argv,
VALUE self) {
872 VALUE encoded_filepath;
873 file_options(argc, argv, &input, &options, &encoded_filepath);
875 profile_input(&input, &options);
876 pm_string_free(&input);
877 pm_options_free(&options);
886parse_stream_fgets(
char *
string,
int size,
void *stream) {
894 const char *cstr = RSTRING_PTR(line);
895 long length = RSTRING_LEN(line);
897 memcpy(
string, cstr, length);
898 string[length] =
'\0';
911parse_stream(
int argc,
VALUE *argv,
VALUE self) {
917 extract_options(&options,
Qnil, keywords);
922 pm_node_t *node = pm_parse_stream(&parser, &buffer, (
void *) stream, parse_stream_fgets, &options);
925 VALUE source = pm_source_new(&parser, encoding);
926 VALUE value = pm_ast_new(&parser, node, encoding, source);
927 VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source);
929 pm_node_destroy(&parser, node);
930 pm_buffer_free(&buffer);
931 pm_parser_free(&parser);
942 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
947 VALUE source = pm_source_new(&parser, encoding);
948 VALUE comments = parser_comments(&parser, source);
950 pm_node_destroy(&parser, node);
951 pm_parser_free(&parser);
964parse_comments(
int argc,
VALUE *argv,
VALUE self) {
967 string_options(argc, argv, &input, &options);
969 VALUE result = parse_input_comments(&input, &options);
970 pm_string_free(&input);
971 pm_options_free(&options);
984parse_file_comments(
int argc,
VALUE *argv,
VALUE self) {
988 VALUE encoded_filepath;
989 file_options(argc, argv, &input, &options, &encoded_filepath);
991 VALUE value = parse_input_comments(&input, &options);
992 pm_string_free(&input);
993 pm_options_free(&options);
1013parse_lex(
int argc,
VALUE *argv,
VALUE self) {
1016 string_options(argc, argv, &input, &options);
1018 VALUE value = parse_lex_input(&input, &options,
true);
1019 pm_string_free(&input);
1020 pm_options_free(&options);
1040parse_lex_file(
int argc,
VALUE *argv,
VALUE self) {
1044 VALUE encoded_filepath;
1045 file_options(argc, argv, &input, &options, &encoded_filepath);
1047 VALUE value = parse_lex_input(&input, &options,
true);
1048 pm_string_free(&input);
1049 pm_options_free(&options);
1060 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
1063 pm_node_destroy(&parser, node);
1066 pm_parser_free(&parser);
1079parse_success_p(
int argc,
VALUE *argv,
VALUE self) {
1082 string_options(argc, argv, &input, &options);
1084 VALUE result = parse_input_success_p(&input, &options);
1085 pm_string_free(&input);
1086 pm_options_free(&options);
1099parse_failure_p(
int argc,
VALUE *argv,
VALUE self) {
1111parse_file_success_p(
int argc,
VALUE *argv,
VALUE self) {
1115 VALUE encoded_filepath;
1116 file_options(argc, argv, &input, &options, &encoded_filepath);
1118 VALUE result = parse_input_success_p(&input, &options);
1119 pm_string_free(&input);
1120 pm_options_free(&options);
1133parse_file_failure_p(
int argc,
VALUE *argv,
VALUE self) {
1149 rb_raise(rb_eArgError,
"Invalid or non ascii-compatible encoding");
1168string_query_local_p(
VALUE self,
VALUE string) {
1169 const uint8_t *source = (
const uint8_t *) check_string(
string);
1170 return string_query(pm_string_query_local(source, RSTRING_LEN(
string), rb_enc_get(
string)->name));
1182string_query_constant_p(
VALUE self,
VALUE string) {
1183 const uint8_t *source = (
const uint8_t *) check_string(
string);
1184 return string_query(pm_string_query_constant(source, RSTRING_LEN(
string), rb_enc_get(
string)->name));
1194string_query_method_name_p(
VALUE self,
VALUE string) {
1195 const uint8_t *source = (
const uint8_t *) check_string(
string);
1196 return string_query(pm_string_query_method_name(source, RSTRING_LEN(
string), rb_enc_get(
string)->name));
1206RUBY_FUNC_EXPORTED
void
1210 if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
1213 "The prism library version (%s) does not match the expected version (%s)",
1215 EXPECTED_PRISM_VERSION
1243 rb_id_option_frozen_string_literal =
rb_intern_const(
"frozen_string_literal");
1249 rb_id_source_for = rb_intern(
"for");
1254 rb_define_const(rb_cPrism,
"VERSION",
rb_str_new2(EXPECTED_PRISM_VERSION));
1273#ifndef PRISM_EXCLUDE_SERIALIZATION
1283 Init_prism_api_node();
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
@ PM_WARNING_LEVEL_DEFAULT
For warnings which should be emitted if $VERBOSE != nil.
@ PM_WARNING_LEVEL_VERBOSE
For warnings which should be emitted if $VERBOSE == true.
@ PM_ERROR_LEVEL_ARGUMENT
For errors that should raise an argument error.
@ PM_ERROR_LEVEL_LOAD
For errors that should raise a load error.
@ PM_ERROR_LEVEL_SYNTAX
For errors that should raise a syntax error.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
VALUE rb_define_module(const char *name)
Defines a top-level module.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define T_STRING
Old name of RUBY_T_STRING.
#define xfree
Old name of ruby_xfree.
#define INT2FIX
Old name of RB_INT2FIX.
#define ID2SYM
Old name of RB_ID2SYM.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
#define SYM2ID
Old name of RB_SYM2ID.
#define xmalloc
Old name of ruby_xmalloc.
#define LONG2FIX
Old name of RB_INT2FIX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define NIL_P
Old name of RB_NIL_P.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eNoMemError
NoMemoryError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
static const uint8_t PM_OPTIONS_COMMAND_LINE_E
A bit representing whether or not the command line -e option was set.
static const uint8_t PM_OPTIONS_COMMAND_LINE_L
A bit representing whether or not the command line -l option was set.
static const uint8_t PM_OPTIONS_COMMAND_LINE_A
A bit representing whether or not the command line -a option was set.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
static const uint8_t PM_OPTIONS_COMMAND_LINE_X
A bit representing whether or not the command line -x option was set.
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
pm_string_init_result_t
Represents the result of calling pm_string_mapped_init or pm_string_file_init.
@ PM_STRING_INIT_SUCCESS
Indicates that the string was successfully initialized.
@ PM_STRING_INIT_ERROR_GENERIC
Indicates a generic error from a string_*_init function, where the type of error should be read from ...
@ PM_STRING_INIT_ERROR_DIRECTORY
Indicates that the file that was attempted to be opened was a directory.
pm_string_query_t
Represents the results of a slice query.
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
#define RARRAY_LEN
Just another name of rb_array_len.
#define errno
Ractor-aware version of errno.
#define RTEST
This is an old name of RB_TEST.
We need a struct here to pass through rb_protect and it has to be a single value.
This struct gets stored in the parser and passed in to the lex callback any time a new token is found...
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
This struct represents a diagnostic generated during parsing.
pm_location_t location
The location of the diagnostic in the source.
const char * message
The message associated with the diagnostic.
pm_list_node_t node
The embedded base node.
pm_diagnostic_id_t diag_id
The ID of the diagnostic.
uint8_t level
The level of the diagnostic, see pm_error_level_t and pm_warning_level_t for possible values.
const char * name
The name of the encoding.
When you are lexing through a file, the lexer needs all of the information that the parser additional...
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
struct pm_list_node * next
A pointer to the next node in the list.
pm_list_node_t * head
A pointer to the head of the list.
size_t size
The size of the list.
const uint8_t * start
A pointer to the start location of the range in the source.
const uint8_t * end
A pointer to the end location of the range in the source.
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This is the base structure that represents a node in the syntax tree.
A scope of locals surrounding the code that is being parsed.
pm_string_t * locals
The names of the locals in the scope.
The options that can be passed to the parser.
pm_options_scope_t * scopes
The scopes surrounding the code that is being parsed.
int32_t line
The line within the file that the parse starts on.
pm_string_t filepath
The name of the file that is currently being parsed.
This struct represents the overall parser.
pm_lex_state_t lex_state
The current state of the lexer.
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
const uint8_t * start
The pointer to the start of the source.
pm_list_t error_list
The list of errors that have been found while parsing.
pm_list_t warning_list
The list of warnings that have been found while parsing.
int32_t start_line
The line number at the start of the parse.
pm_list_t comment_list
The list of comments that have been found while parsing.
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
A generic string type that can have various ownership semantics.
This struct represents a token in the Ruby source.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.