Ruby 3.5.0dev (2025-01-10 revision 5fab31b15e32622c4b71d1d347a41937e9f9c212)
extension.c
1#include "prism/extension.h"
2
3#ifdef _WIN32
4#include <ruby/win32.h>
5#endif
6
7// NOTE: this file should contain only bindings. All non-trivial logic should be
8// in libprism so it can be shared its the various callers.
9
10VALUE rb_cPrism;
11VALUE rb_cPrismNode;
12VALUE rb_cPrismSource;
13VALUE rb_cPrismToken;
14VALUE rb_cPrismLocation;
15
16VALUE rb_cPrismComment;
17VALUE rb_cPrismInlineComment;
18VALUE rb_cPrismEmbDocComment;
19VALUE rb_cPrismMagicComment;
20VALUE rb_cPrismParseError;
21VALUE rb_cPrismParseWarning;
22VALUE rb_cPrismResult;
23VALUE rb_cPrismParseResult;
24VALUE rb_cPrismLexResult;
25VALUE rb_cPrismParseLexResult;
26VALUE rb_cPrismStringQuery;
27
28VALUE rb_cPrismDebugEncoding;
29
30ID rb_id_option_command_line;
31ID rb_id_option_encoding;
32ID rb_id_option_filepath;
33ID rb_id_option_frozen_string_literal;
34ID rb_id_option_line;
35ID rb_id_option_main_script;
36ID rb_id_option_partial_script;
37ID rb_id_option_scopes;
38ID rb_id_option_version;
39ID rb_id_source_for;
40
41/******************************************************************************/
42/* IO of Ruby code */
43/******************************************************************************/
44
49static const char *
50check_string(VALUE value) {
51 // Check if the value is a string. If it's not, then raise a type error.
52 if (!RB_TYPE_P(value, T_STRING)) {
53 rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(value));
54 }
55
56 // Otherwise, return the value as a C string.
57 return RSTRING_PTR(value);
58}
59
63static void
64input_load_string(pm_string_t *input, VALUE string) {
65 // Check if the string is a string. If it's not, then raise a type error.
66 if (!RB_TYPE_P(string, T_STRING)) {
67 rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
68 }
69
70 pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
71}
72
73/******************************************************************************/
74/* Building C options from Ruby options */
75/******************************************************************************/
76
80static void
81build_options_scopes(pm_options_t *options, VALUE scopes) {
82 // Check if the value is an array. If it's not, then raise a type error.
83 if (!RB_TYPE_P(scopes, T_ARRAY)) {
84 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
85 }
86
87 // Initialize the scopes array.
88 size_t scopes_count = RARRAY_LEN(scopes);
89 if (!pm_options_scopes_init(options, scopes_count)) {
90 rb_raise(rb_eNoMemError, "failed to allocate memory");
91 }
92
93 // Iterate over the scopes and add them to the options.
94 for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
95 VALUE scope = rb_ary_entry(scopes, scope_index);
96
97 // Check that the scope is an array. If it's not, then raise a type
98 // error.
99 if (!RB_TYPE_P(scope, T_ARRAY)) {
100 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
101 }
102
103 // Initialize the scope array.
104 size_t locals_count = RARRAY_LEN(scope);
105 pm_options_scope_t *options_scope = &options->scopes[scope_index];
106 if (!pm_options_scope_init(options_scope, locals_count)) {
107 rb_raise(rb_eNoMemError, "failed to allocate memory");
108 }
109
110 // Iterate over the locals and add them to the scope.
111 for (size_t local_index = 0; local_index < locals_count; local_index++) {
112 VALUE local = rb_ary_entry(scope, local_index);
113
114 // Check that the local is a symbol. If it's not, then raise a
115 // type error.
116 if (!RB_TYPE_P(local, T_SYMBOL)) {
117 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
118 }
119
120 // Add the local to the scope.
121 pm_string_t *scope_local = &options_scope->locals[local_index];
122 const char *name = rb_id2name(SYM2ID(local));
123 pm_string_constant_init(scope_local, name, strlen(name));
124 }
125 }
126}
127
131static int
132build_options_i(VALUE key, VALUE value, VALUE argument) {
133 pm_options_t *options = (pm_options_t *) argument;
134 ID key_id = SYM2ID(key);
135
136 if (key_id == rb_id_option_filepath) {
137 if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
138 } else if (key_id == rb_id_option_encoding) {
139 if (!NIL_P(value)) {
140 if (value == Qfalse) {
141 pm_options_encoding_locked_set(options, true);
142 } else {
143 pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
144 }
145 }
146 } else if (key_id == rb_id_option_line) {
147 if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
148 } else if (key_id == rb_id_option_frozen_string_literal) {
149 if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
150 } else if (key_id == rb_id_option_version) {
151 if (!NIL_P(value)) {
152 const char *version = check_string(value);
153
154 if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
155 rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
156 }
157 }
158 } else if (key_id == rb_id_option_scopes) {
159 if (!NIL_P(value)) build_options_scopes(options, value);
160 } else if (key_id == rb_id_option_command_line) {
161 if (!NIL_P(value)) {
162 const char *string = check_string(value);
163 uint8_t command_line = 0;
164
165 for (size_t index = 0; index < strlen(string); index++) {
166 switch (string[index]) {
167 case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break;
168 case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break;
169 case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break;
170 case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break;
171 case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break;
172 case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break;
173 default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break;
174 }
175 }
176
177 pm_options_command_line_set(options, command_line);
178 }
179 } else if (key_id == rb_id_option_main_script) {
180 if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
181 } else if (key_id == rb_id_option_partial_script) {
182 if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
183 } else {
184 rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
185 }
186
187 return ST_CONTINUE;
188}
189
196 pm_options_t *options;
197 VALUE keywords;
198};
199
204static VALUE
205build_options(VALUE argument) {
206 struct build_options_data *data = (struct build_options_data *) argument;
207 rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
208 return Qnil;
209}
210
214static void
215extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
216 options->line = 1; // default
217
218 if (!NIL_P(keywords)) {
219 struct build_options_data data = { .options = options, .keywords = keywords };
220 struct build_options_data *argument = &data;
221
222 int state = 0;
223 rb_protect(build_options, (VALUE) argument, &state);
224
225 if (state != 0) {
226 pm_options_free(options);
227 rb_jump_tag(state);
228 }
229 }
230
231 if (!NIL_P(filepath)) {
232 if (!RB_TYPE_P(filepath, T_STRING)) {
233 pm_options_free(options);
234 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
235 }
236
237 pm_options_filepath_set(options, RSTRING_PTR(filepath));
238 }
239}
240
244static void
245string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
246 VALUE string;
247 VALUE keywords;
248 rb_scan_args(argc, argv, "1:", &string, &keywords);
249
250 extract_options(options, Qnil, keywords);
251 input_load_string(input, string);
252}
253
257static void
258file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
259 VALUE filepath;
260 VALUE keywords;
261 rb_scan_args(argc, argv, "1:", &filepath, &keywords);
262
263 Check_Type(filepath, T_STRING);
264 *encoded_filepath = rb_str_encode_ospath(filepath);
265 extract_options(options, *encoded_filepath, keywords);
266
267 const char *source = (const char *) pm_string_source(&options->filepath);
269
270 switch (result = pm_string_file_init(input, source)) {
272 break;
274 pm_options_free(options);
275
276#ifdef _WIN32
277 int e = rb_w32_map_errno(GetLastError());
278#else
279 int e = errno;
280#endif
281
282 rb_syserr_fail(e, source);
283 break;
284 }
286 pm_options_free(options);
287 rb_syserr_fail(EISDIR, source);
288 break;
289 default:
290 pm_options_free(options);
291 rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
292 break;
293 }
294}
295
296#ifndef PRISM_EXCLUDE_SERIALIZATION
297
298/******************************************************************************/
299/* Serializing the AST */
300/******************************************************************************/
301
305static VALUE
306dump_input(pm_string_t *input, const pm_options_t *options) {
307 pm_buffer_t buffer;
308 if (!pm_buffer_init(&buffer)) {
309 rb_raise(rb_eNoMemError, "failed to allocate memory");
310 }
311
312 pm_parser_t parser;
313 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
314
315 pm_node_t *node = pm_parse(&parser);
316 pm_serialize(&parser, node, &buffer);
317
318 VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
319 pm_node_destroy(&parser, node);
320 pm_buffer_free(&buffer);
321 pm_parser_free(&parser);
322
323 return result;
324}
325
333static VALUE
334dump(int argc, VALUE *argv, VALUE self) {
335 pm_string_t input;
336 pm_options_t options = { 0 };
337 string_options(argc, argv, &input, &options);
338
339#ifdef PRISM_BUILD_DEBUG
340 size_t length = pm_string_length(&input);
341 char* dup = xmalloc(length);
342 memcpy(dup, pm_string_source(&input), length);
343 pm_string_constant_init(&input, dup, length);
344#endif
345
346 VALUE value = dump_input(&input, &options);
347
348#ifdef PRISM_BUILD_DEBUG
349 xfree(dup);
350#endif
351
352 pm_string_free(&input);
353 pm_options_free(&options);
354
355 return value;
356}
357
365static VALUE
366dump_file(int argc, VALUE *argv, VALUE self) {
367 pm_string_t input;
368 pm_options_t options = { 0 };
369
370 VALUE encoded_filepath;
371 file_options(argc, argv, &input, &options, &encoded_filepath);
372
373 VALUE value = dump_input(&input, &options);
374 pm_string_free(&input);
375 pm_options_free(&options);
376
377 return value;
378}
379
380#endif
381
382/******************************************************************************/
383/* Extracting values for the parse result */
384/******************************************************************************/
385
389static VALUE
390parser_comments(pm_parser_t *parser, VALUE source) {
391 VALUE comments = rb_ary_new_capa(parser->comment_list.size);
392
393 for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
394 VALUE location_argv[] = {
395 source,
396 LONG2FIX(comment->location.start - parser->start),
397 LONG2FIX(comment->location.end - comment->location.start)
398 };
399
400 VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
401 VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
402 rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
403 }
404
405 return comments;
406}
407
411static VALUE
412parser_magic_comments(pm_parser_t *parser, VALUE source) {
413 VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
414
416 VALUE key_loc_argv[] = {
417 source,
418 LONG2FIX(magic_comment->key_start - parser->start),
419 LONG2FIX(magic_comment->key_length)
420 };
421
422 VALUE value_loc_argv[] = {
423 source,
424 LONG2FIX(magic_comment->value_start - parser->start),
425 LONG2FIX(magic_comment->value_length)
426 };
427
428 VALUE magic_comment_argv[] = {
429 rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
430 rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
431 };
432
433 rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
434 }
435
436 return magic_comments;
437}
438
443static VALUE
444parser_data_loc(const pm_parser_t *parser, VALUE source) {
445 if (parser->data_loc.end == NULL) {
446 return Qnil;
447 } else {
448 VALUE argv[] = {
449 source,
450 LONG2FIX(parser->data_loc.start - parser->start),
451 LONG2FIX(parser->data_loc.end - parser->data_loc.start)
452 };
453
454 return rb_class_new_instance(3, argv, rb_cPrismLocation);
455 }
456}
457
461static VALUE
462parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
463 VALUE errors = rb_ary_new_capa(parser->error_list.size);
464 pm_diagnostic_t *error;
465
466 for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
467 VALUE location_argv[] = {
468 source,
469 LONG2FIX(error->location.start - parser->start),
470 LONG2FIX(error->location.end - error->location.start)
471 };
472
473 VALUE level = Qnil;
474 switch (error->level) {
476 level = ID2SYM(rb_intern("syntax"));
477 break;
479 level = ID2SYM(rb_intern("argument"));
480 break;
482 level = ID2SYM(rb_intern("load"));
483 break;
484 default:
485 rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
486 }
487
488 VALUE error_argv[] = {
489 ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))),
490 rb_enc_str_new_cstr(error->message, encoding),
491 rb_class_new_instance(3, location_argv, rb_cPrismLocation),
492 level
493 };
494
495 rb_ary_push(errors, rb_class_new_instance(4, error_argv, rb_cPrismParseError));
496 }
497
498 return errors;
499}
500
504static VALUE
505parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
506 VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
507 pm_diagnostic_t *warning;
508
509 for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
510 VALUE location_argv[] = {
511 source,
512 LONG2FIX(warning->location.start - parser->start),
513 LONG2FIX(warning->location.end - warning->location.start)
514 };
515
516 VALUE level = Qnil;
517 switch (warning->level) {
519 level = ID2SYM(rb_intern("default"));
520 break;
522 level = ID2SYM(rb_intern("verbose"));
523 break;
524 default:
525 rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
526 }
527
528 VALUE warning_argv[] = {
529 ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))),
530 rb_enc_str_new_cstr(warning->message, encoding),
531 rb_class_new_instance(3, location_argv, rb_cPrismLocation),
532 level
533 };
534
535 rb_ary_push(warnings, rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning));
536 }
537
538 return warnings;
539}
540
544static VALUE
545parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
546 VALUE result_argv[] = {
547 value,
548 parser_comments(parser, source),
549 parser_magic_comments(parser, source),
550 parser_data_loc(parser, source),
551 parser_errors(parser, encoding, source),
552 parser_warnings(parser, encoding, source),
553 source
554 };
555
556 return rb_class_new_instance(7, result_argv, class);
557}
558
559/******************************************************************************/
560/* Lexing Ruby code */
561/******************************************************************************/
562
568typedef struct {
569 VALUE source;
570 VALUE tokens;
571 rb_encoding *encoding;
573
579static void
580parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
581 parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
582
583 VALUE yields = rb_assoc_new(
584 pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source),
585 INT2FIX(parser->lex_state)
586 );
587
588 rb_ary_push(parse_lex_data->tokens, yields);
589}
590
596static void
597parse_lex_encoding_changed_callback(pm_parser_t *parser) {
598 parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
599 parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
600
601 // Since the encoding changed, we need to go back and change the encoding of
602 // the tokens that were already lexed. This is only going to end up being
603 // one or two tokens, since the encoding can only change at the top of the
604 // file.
605 VALUE tokens = parse_lex_data->tokens;
606 for (long index = 0; index < RARRAY_LEN(tokens); index++) {
607 VALUE yields = rb_ary_entry(tokens, index);
608 VALUE token = rb_ary_entry(yields, 0);
609
610 VALUE value = rb_ivar_get(token, rb_intern("@value"));
611 rb_enc_associate(value, parse_lex_data->encoding);
612 ENC_CODERANGE_CLEAR(value);
613 }
614}
615
620static VALUE
621parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
622 pm_parser_t parser;
623 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
624 pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
625
626 VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
627 VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
628 VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
629
630 parse_lex_data_t parse_lex_data = {
631 .source = source,
632 .tokens = rb_ary_new(),
633 .encoding = rb_utf8_encoding()
634 };
635
636 parse_lex_data_t *data = &parse_lex_data;
637 pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
638 .data = (void *) data,
639 .callback = parse_lex_token,
640 };
641
642 parser.lex_callback = &lex_callback;
643 pm_node_t *node = pm_parse(&parser);
644
645 // Here we need to update the Source object to have the correct
646 // encoding for the source string and the correct newline offsets.
647 // We do it here because we've already created the Source object and given
648 // it over to all of the tokens, and both of these are only set after pm_parse().
649 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
650 rb_enc_associate(source_string, encoding);
651
652 for (size_t index = 0; index < parser.newline_list.size; index++) {
653 rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
654 }
655
656 VALUE result;
657 if (return_nodes) {
658 VALUE value = rb_ary_new_capa(2);
659 rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
660 rb_ary_push(value, parse_lex_data.tokens);
661 result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
662 } else {
663 result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source);
664 }
665
666 pm_node_destroy(&parser, node);
667 pm_parser_free(&parser);
668
669 return result;
670}
671
679static VALUE
680lex(int argc, VALUE *argv, VALUE self) {
681 pm_string_t input;
682 pm_options_t options = { 0 };
683 string_options(argc, argv, &input, &options);
684
685 VALUE result = parse_lex_input(&input, &options, false);
686 pm_string_free(&input);
687 pm_options_free(&options);
688
689 return result;
690}
691
699static VALUE
700lex_file(int argc, VALUE *argv, VALUE self) {
701 pm_string_t input;
702 pm_options_t options = { 0 };
703
704 VALUE encoded_filepath;
705 file_options(argc, argv, &input, &options, &encoded_filepath);
706
707 VALUE value = parse_lex_input(&input, &options, false);
708 pm_string_free(&input);
709 pm_options_free(&options);
710
711 return value;
712}
713
714/******************************************************************************/
715/* Parsing Ruby code */
716/******************************************************************************/
717
721static VALUE
722parse_input(pm_string_t *input, const pm_options_t *options) {
723 pm_parser_t parser;
724 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
725
726 pm_node_t *node = pm_parse(&parser);
727 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
728
729 VALUE source = pm_source_new(&parser, encoding);
730 VALUE value = pm_ast_new(&parser, node, encoding, source);
731 VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ;
732
733 pm_node_destroy(&parser, node);
734 pm_parser_free(&parser);
735
736 return result;
737}
738
779static VALUE
780parse(int argc, VALUE *argv, VALUE self) {
781 pm_string_t input;
782 pm_options_t options = { 0 };
783 string_options(argc, argv, &input, &options);
784
785#ifdef PRISM_BUILD_DEBUG
786 size_t length = pm_string_length(&input);
787 char* dup = xmalloc(length);
788 memcpy(dup, pm_string_source(&input), length);
789 pm_string_constant_init(&input, dup, length);
790#endif
791
792 VALUE value = parse_input(&input, &options);
793
794#ifdef PRISM_BUILD_DEBUG
795 xfree(dup);
796#endif
797
798 pm_string_free(&input);
799 pm_options_free(&options);
800 return value;
801}
802
810static VALUE
811parse_file(int argc, VALUE *argv, VALUE self) {
812 pm_string_t input;
813 pm_options_t options = { 0 };
814
815 VALUE encoded_filepath;
816 file_options(argc, argv, &input, &options, &encoded_filepath);
817
818 VALUE value = parse_input(&input, &options);
819 pm_string_free(&input);
820 pm_options_free(&options);
821
822 return value;
823}
824
828static void
829profile_input(pm_string_t *input, const pm_options_t *options) {
830 pm_parser_t parser;
831 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
832
833 pm_node_t *node = pm_parse(&parser);
834 pm_node_destroy(&parser, node);
835 pm_parser_free(&parser);
836}
837
846static VALUE
847profile(int argc, VALUE *argv, VALUE self) {
848 pm_string_t input;
849 pm_options_t options = { 0 };
850
851 string_options(argc, argv, &input, &options);
852 profile_input(&input, &options);
853 pm_string_free(&input);
854 pm_options_free(&options);
855
856 return Qnil;
857}
858
867static VALUE
868profile_file(int argc, VALUE *argv, VALUE self) {
869 pm_string_t input;
870 pm_options_t options = { 0 };
871
872 VALUE encoded_filepath;
873 file_options(argc, argv, &input, &options, &encoded_filepath);
874
875 profile_input(&input, &options);
876 pm_string_free(&input);
877 pm_options_free(&options);
878
879 return Qnil;
880}
881
885static char *
886parse_stream_fgets(char *string, int size, void *stream) {
887 RUBY_ASSERT(size > 0);
888
889 VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
890 if (NIL_P(line)) {
891 return NULL;
892 }
893
894 const char *cstr = RSTRING_PTR(line);
895 long length = RSTRING_LEN(line);
896
897 memcpy(string, cstr, length);
898 string[length] = '\0';
899
900 return string;
901}
902
910static VALUE
911parse_stream(int argc, VALUE *argv, VALUE self) {
912 VALUE stream;
913 VALUE keywords;
914 rb_scan_args(argc, argv, "1:", &stream, &keywords);
915
916 pm_options_t options = { 0 };
917 extract_options(&options, Qnil, keywords);
918
919 pm_parser_t parser;
920 pm_buffer_t buffer;
921
922 pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
923 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
924
925 VALUE source = pm_source_new(&parser, encoding);
926 VALUE value = pm_ast_new(&parser, node, encoding, source);
927 VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source);
928
929 pm_node_destroy(&parser, node);
930 pm_buffer_free(&buffer);
931 pm_parser_free(&parser);
932
933 return result;
934}
935
939static VALUE
940parse_input_comments(pm_string_t *input, const pm_options_t *options) {
941 pm_parser_t parser;
942 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
943
944 pm_node_t *node = pm_parse(&parser);
945 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
946
947 VALUE source = pm_source_new(&parser, encoding);
948 VALUE comments = parser_comments(&parser, source);
949
950 pm_node_destroy(&parser, node);
951 pm_parser_free(&parser);
952
953 return comments;
954}
955
963static VALUE
964parse_comments(int argc, VALUE *argv, VALUE self) {
965 pm_string_t input;
966 pm_options_t options = { 0 };
967 string_options(argc, argv, &input, &options);
968
969 VALUE result = parse_input_comments(&input, &options);
970 pm_string_free(&input);
971 pm_options_free(&options);
972
973 return result;
974}
975
983static VALUE
984parse_file_comments(int argc, VALUE *argv, VALUE self) {
985 pm_string_t input;
986 pm_options_t options = { 0 };
987
988 VALUE encoded_filepath;
989 file_options(argc, argv, &input, &options, &encoded_filepath);
990
991 VALUE value = parse_input_comments(&input, &options);
992 pm_string_free(&input);
993 pm_options_free(&options);
994
995 return value;
996}
997
1012static VALUE
1013parse_lex(int argc, VALUE *argv, VALUE self) {
1014 pm_string_t input;
1015 pm_options_t options = { 0 };
1016 string_options(argc, argv, &input, &options);
1017
1018 VALUE value = parse_lex_input(&input, &options, true);
1019 pm_string_free(&input);
1020 pm_options_free(&options);
1021
1022 return value;
1023}
1024
1039static VALUE
1040parse_lex_file(int argc, VALUE *argv, VALUE self) {
1041 pm_string_t input;
1042 pm_options_t options = { 0 };
1043
1044 VALUE encoded_filepath;
1045 file_options(argc, argv, &input, &options, &encoded_filepath);
1046
1047 VALUE value = parse_lex_input(&input, &options, true);
1048 pm_string_free(&input);
1049 pm_options_free(&options);
1050
1051 return value;
1052}
1053
1057static VALUE
1058parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
1059 pm_parser_t parser;
1060 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
1061
1062 pm_node_t *node = pm_parse(&parser);
1063 pm_node_destroy(&parser, node);
1064
1065 VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
1066 pm_parser_free(&parser);
1067
1068 return result;
1069}
1070
1078static VALUE
1079parse_success_p(int argc, VALUE *argv, VALUE self) {
1080 pm_string_t input;
1081 pm_options_t options = { 0 };
1082 string_options(argc, argv, &input, &options);
1083
1084 VALUE result = parse_input_success_p(&input, &options);
1085 pm_string_free(&input);
1086 pm_options_free(&options);
1087
1088 return result;
1089}
1090
1098static VALUE
1099parse_failure_p(int argc, VALUE *argv, VALUE self) {
1100 return RTEST(parse_success_p(argc, argv, self)) ? Qfalse : Qtrue;
1101}
1102
1110static VALUE
1111parse_file_success_p(int argc, VALUE *argv, VALUE self) {
1112 pm_string_t input;
1113 pm_options_t options = { 0 };
1114
1115 VALUE encoded_filepath;
1116 file_options(argc, argv, &input, &options, &encoded_filepath);
1117
1118 VALUE result = parse_input_success_p(&input, &options);
1119 pm_string_free(&input);
1120 pm_options_free(&options);
1121
1122 return result;
1123}
1124
1132static VALUE
1133parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
1134 return RTEST(parse_file_success_p(argc, argv, self)) ? Qfalse : Qtrue;
1135}
1136
1137/******************************************************************************/
1138/* String query methods */
1139/******************************************************************************/
1140
1145static VALUE
1146string_query(pm_string_query_t result) {
1147 switch (result) {
1149 rb_raise(rb_eArgError, "Invalid or non ascii-compatible encoding");
1150 return Qfalse;
1152 return Qfalse;
1154 return Qtrue;
1155 }
1156 return Qfalse;
1157}
1158
1167static VALUE
1168string_query_local_p(VALUE self, VALUE string) {
1169 const uint8_t *source = (const uint8_t *) check_string(string);
1170 return string_query(pm_string_query_local(source, RSTRING_LEN(string), rb_enc_get(string)->name));
1171}
1172
1181static VALUE
1182string_query_constant_p(VALUE self, VALUE string) {
1183 const uint8_t *source = (const uint8_t *) check_string(string);
1184 return string_query(pm_string_query_constant(source, RSTRING_LEN(string), rb_enc_get(string)->name));
1185}
1186
1193static VALUE
1194string_query_method_name_p(VALUE self, VALUE string) {
1195 const uint8_t *source = (const uint8_t *) check_string(string);
1196 return string_query(pm_string_query_method_name(source, RSTRING_LEN(string), rb_enc_get(string)->name));
1197}
1198
1199/******************************************************************************/
1200/* Initialization of the extension */
1201/******************************************************************************/
1202
1206RUBY_FUNC_EXPORTED void
1207Init_prism(void) {
1208 // Make sure that the prism library version matches the expected version.
1209 // Otherwise something was compiled incorrectly.
1210 if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
1211 rb_raise(
1213 "The prism library version (%s) does not match the expected version (%s)",
1214 pm_version(),
1215 EXPECTED_PRISM_VERSION
1216 );
1217 }
1218
1219 // Grab up references to all of the constants that we're going to need to
1220 // reference throughout this extension.
1221 rb_cPrism = rb_define_module("Prism");
1222 rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
1223 rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
1224 rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
1225 rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
1226 rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
1227 rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
1228 rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
1229 rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
1230 rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
1231 rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
1232 rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
1233 rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
1234 rb_cPrismLexResult = rb_define_class_under(rb_cPrism, "LexResult", rb_cPrismResult);
1235 rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
1236 rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);
1237
1238 // Intern all of the IDs eagerly that we support so that we don't have to do
1239 // it every time we parse.
1240 rb_id_option_command_line = rb_intern_const("command_line");
1241 rb_id_option_encoding = rb_intern_const("encoding");
1242 rb_id_option_filepath = rb_intern_const("filepath");
1243 rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
1244 rb_id_option_line = rb_intern_const("line");
1245 rb_id_option_main_script = rb_intern_const("main_script");
1246 rb_id_option_partial_script = rb_intern_const("partial_script");
1247 rb_id_option_scopes = rb_intern_const("scopes");
1248 rb_id_option_version = rb_intern_const("version");
1249 rb_id_source_for = rb_intern("for");
1250
1254 rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
1255
1256 // First, the functions that have to do with lexing and parsing.
1257 rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
1258 rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
1259 rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
1260 rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
1261 rb_define_singleton_method(rb_cPrism, "profile", profile, -1);
1262 rb_define_singleton_method(rb_cPrism, "profile_file", profile_file, -1);
1263 rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
1264 rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
1265 rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
1266 rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
1267 rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
1268 rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
1269 rb_define_singleton_method(rb_cPrism, "parse_failure?", parse_failure_p, -1);
1270 rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
1271 rb_define_singleton_method(rb_cPrism, "parse_file_failure?", parse_file_failure_p, -1);
1272
1273#ifndef PRISM_EXCLUDE_SERIALIZATION
1274 rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
1275 rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
1276#endif
1277
1278 rb_define_singleton_method(rb_cPrismStringQuery, "local?", string_query_local_p, 1);
1279 rb_define_singleton_method(rb_cPrismStringQuery, "constant?", string_query_constant_p, 1);
1280 rb_define_singleton_method(rb_cPrismStringQuery, "method_name?", string_query_method_name_p, 1);
1281
1282 // Next, initialize the other APIs.
1283 Init_prism_api_node();
1284 Init_prism_pack();
1285}
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
@ PM_WARNING_LEVEL_DEFAULT
For warnings which should be emitted if $VERBOSE != nil.
Definition diagnostic.h:405
@ PM_WARNING_LEVEL_VERBOSE
For warnings which should be emitted if $VERBOSE == true.
Definition diagnostic.h:408
@ PM_ERROR_LEVEL_ARGUMENT
For errors that should raise an argument error.
Definition diagnostic.h:394
@ PM_ERROR_LEVEL_LOAD
For errors that should raise a load error.
Definition diagnostic.h:397
@ PM_ERROR_LEVEL_SYNTAX
For errors that should raise a syntax error.
Definition diagnostic.h:391
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:1012
VALUE rb_define_module(const char *name)
Defines a top-level module.
Definition class.c:1095
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition class.c:2635
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1675
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define LONG2FIX
Old name of RB_INT2FIX.
Definition long.h:49
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition long.h:50
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
Definition coderange.h:187
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
Definition error.c:3877
VALUE rb_eNoMemError
NoMemoryError exception.
Definition error.c:1441
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
Definition object.c:2138
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:247
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.c:1098
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
Definition vm_eval.c:1099
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
Definition variable.c:1362
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
Definition symbol.h:284
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
static const uint8_t PM_OPTIONS_COMMAND_LINE_E
A bit representing whether or not the command line -e option was set.
Definition options.h:173
static const uint8_t PM_OPTIONS_COMMAND_LINE_L
A bit representing whether or not the command line -l option was set.
Definition options.h:179
static const uint8_t PM_OPTIONS_COMMAND_LINE_A
A bit representing whether or not the command line -a option was set.
Definition options.h:166
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:185
static const uint8_t PM_OPTIONS_COMMAND_LINE_X
A bit representing whether or not the command line -x option was set.
Definition options.h:197
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:191
pm_string_init_result_t
Represents the result of calling pm_string_mapped_init or pm_string_file_init.
Definition pm_string.h:105
@ PM_STRING_INIT_SUCCESS
Indicates that the string was successfully initialized.
Definition pm_string.h:107
@ PM_STRING_INIT_ERROR_GENERIC
Indicates a generic error from a string_*_init function, where the type of error should be read from ...
Definition pm_string.h:112
@ PM_STRING_INIT_ERROR_DIRECTORY
Indicates that the file that was attempted to be opened was a directory.
Definition pm_string.h:116
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
#define RTEST
This is an old name of RB_TEST.
We need a struct here to pass through rb_protect and it has to be a single value.
Definition extension.c:195
This struct gets stored in the parser and passed in to the lex callback any time a new token is found...
Definition extension.c:568
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:359
pm_location_t location
The location of the diagnostic in the source.
Definition diagnostic.h:364
const char * message
The message associated with the diagnostic.
Definition diagnostic.h:370
pm_list_node_t node
The embedded base node.
Definition diagnostic.h:361
pm_diagnostic_id_t diag_id
The ID of the diagnostic.
Definition diagnostic.h:367
uint8_t level
The level of the diagnostic, see pm_error_level_t and pm_warning_level_t for possible values.
Definition diagnostic.h:383
const char * name
The name of the encoding.
Definition encoding.h:56
When you are lexing through a file, the lexer needs all of the information that the parser additional...
Definition parser.h:506
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1069
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
pm_string_t * locals
The names of the locals in the scope.
Definition options.h:41
The options that can be passed to the parser.
Definition options.h:77
pm_options_scope_t * scopes
The scopes surrounding the code that is being parsed.
Definition options.h:116
int32_t line
The line within the file that the parse starts on.
Definition options.h:97
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:91
This struct represents the overall parser.
Definition parser.h:640
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
This struct represents a token in the Ruby source.
Definition ast.h:530
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
Definition value_type.h:433
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376