Ruby 3.5.0dev (2025-02-22 revision 412997300569c1853c09813e4924b6df3d7e8669)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1044 return void_node != NULL ? void_node : node;
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1250 type = "::";
1251 length = 2;
1252 break;
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1270 case PM_RATIONAL_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2629
2635static pm_call_node_t *
2636pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2637 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2638
2639 *node = (pm_call_node_t) {
2640 {
2641 .type = PM_CALL_NODE,
2642 .flags = flags,
2643 .node_id = PM_NODE_IDENTIFY(parser),
2644 .location = PM_LOCATION_NULL_VALUE(parser),
2645 },
2646 .receiver = NULL,
2647 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2648 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .arguments = NULL,
2651 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2652 .block = NULL,
2653 .name = 0
2654 };
2655
2656 return node;
2657}
2658
2663static inline pm_node_flags_t
2664pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2665 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2666}
2667
2672static pm_call_node_t *
2673pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2674 pm_assert_value_expression(parser, receiver);
2675
2676 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2677 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2678 flags |= PM_CALL_NODE_FLAGS_INDEX;
2679 }
2680
2681 pm_call_node_t *node = pm_call_node_create(parser, flags);
2682
2683 node->base.location.start = receiver->location.start;
2684 node->base.location.end = pm_arguments_end(arguments);
2685
2686 node->receiver = receiver;
2687 node->message_loc.start = arguments->opening_loc.start;
2688 node->message_loc.end = arguments->closing_loc.end;
2689
2690 node->opening_loc = arguments->opening_loc;
2691 node->arguments = arguments->arguments;
2692 node->closing_loc = arguments->closing_loc;
2693 node->block = arguments->block;
2694
2695 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2696 return node;
2697}
2698
2702static pm_call_node_t *
2703pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2704 pm_assert_value_expression(parser, receiver);
2705 pm_assert_value_expression(parser, argument);
2706
2707 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2708
2709 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2710 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2711
2712 node->receiver = receiver;
2713 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2714
2715 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2716 pm_arguments_node_arguments_append(arguments, argument);
2717 node->arguments = arguments;
2718
2719 node->name = pm_parser_constant_id_token(parser, operator);
2720 return node;
2721}
2722
2726static pm_call_node_t *
2727pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2728 pm_assert_value_expression(parser, receiver);
2729
2730 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2731
2732 node->base.location.start = receiver->location.start;
2733 const uint8_t *end = pm_arguments_end(arguments);
2734 if (end == NULL) {
2735 end = message->end;
2736 }
2737 node->base.location.end = end;
2738
2739 node->receiver = receiver;
2740 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2741 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2742 node->opening_loc = arguments->opening_loc;
2743 node->arguments = arguments->arguments;
2744 node->closing_loc = arguments->closing_loc;
2745 node->block = arguments->block;
2746
2747 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2748 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2749 }
2750
2751 node->name = pm_parser_constant_id_token(parser, message);
2752 return node;
2753}
2754
2758static pm_call_node_t *
2759pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2760 pm_call_node_t *node = pm_call_node_create(parser, 0);
2761 node->base.location.start = parser->start;
2762 node->base.location.end = parser->end;
2763
2764 node->receiver = receiver;
2765 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2766 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->arguments = arguments;
2768
2769 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2770 return node;
2771}
2772
2777static pm_call_node_t *
2778pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2779 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2780
2781 node->base.location.start = message->start;
2782 node->base.location.end = pm_arguments_end(arguments);
2783
2784 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2785 node->opening_loc = arguments->opening_loc;
2786 node->arguments = arguments->arguments;
2787 node->closing_loc = arguments->closing_loc;
2788 node->block = arguments->block;
2789
2790 node->name = pm_parser_constant_id_token(parser, message);
2791 return node;
2792}
2793
2798static pm_call_node_t *
2799pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2800 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2801
2802 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2803 node->arguments = arguments;
2804
2805 node->name = name;
2806 return node;
2807}
2808
2812static pm_call_node_t *
2813pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2814 pm_assert_value_expression(parser, receiver);
2815 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2816
2817 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2818
2819 node->base.location.start = message->start;
2820 if (arguments->closing_loc.start != NULL) {
2821 node->base.location.end = arguments->closing_loc.end;
2822 } else {
2823 assert(receiver != NULL);
2824 node->base.location.end = receiver->location.end;
2825 }
2826
2827 node->receiver = receiver;
2828 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2829 node->opening_loc = arguments->opening_loc;
2830 node->arguments = arguments->arguments;
2831 node->closing_loc = arguments->closing_loc;
2832
2833 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2834 return node;
2835}
2836
2840static pm_call_node_t *
2841pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2842 pm_assert_value_expression(parser, receiver);
2843
2844 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2845
2846 node->base.location.start = receiver->location.start;
2847 node->base.location.end = pm_arguments_end(arguments);
2848
2849 node->receiver = receiver;
2850 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2851 node->opening_loc = arguments->opening_loc;
2852 node->arguments = arguments->arguments;
2853 node->closing_loc = arguments->closing_loc;
2854 node->block = arguments->block;
2855
2856 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2857 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2858 }
2859
2860 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2861 return node;
2862}
2863
2867static pm_call_node_t *
2868pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2869 pm_assert_value_expression(parser, receiver);
2870
2871 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2872
2873 node->base.location.start = operator->start;
2874 node->base.location.end = receiver->location.end;
2875
2876 node->receiver = receiver;
2877 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2878
2879 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2880 return node;
2881}
2882
2887static pm_call_node_t *
2888pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2889 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2890
2891 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2892 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2893
2894 node->name = pm_parser_constant_id_token(parser, message);
2895 return node;
2896}
2897
2902static inline bool
2903pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2904 return (
2905 (node->message_loc.start != NULL) &&
2906 (node->message_loc.end[-1] != '!') &&
2907 (node->message_loc.end[-1] != '?') &&
2908 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2909 (node->opening_loc.start == NULL) &&
2910 (node->arguments == NULL) &&
2911 (node->block == NULL)
2912 );
2913}
2914
2918static void
2919pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2920 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2921
2922 if (write_constant->length > 0) {
2923 size_t length = write_constant->length - 1;
2924
2925 void *memory = xmalloc(length);
2926 memcpy(memory, write_constant->start, length);
2927
2928 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2929 } else {
2930 // We can get here if the message was missing because of a syntax error.
2931 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2932 }
2933}
2934
2938static pm_call_and_write_node_t *
2939pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2940 assert(target->block == NULL);
2941 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2942 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2943
2944 *node = (pm_call_and_write_node_t) {
2945 {
2946 .type = PM_CALL_AND_WRITE_NODE,
2947 .flags = target->base.flags,
2948 .node_id = PM_NODE_IDENTIFY(parser),
2949 .location = {
2950 .start = target->base.location.start,
2951 .end = value->location.end
2952 }
2953 },
2954 .receiver = target->receiver,
2955 .call_operator_loc = target->call_operator_loc,
2956 .message_loc = target->message_loc,
2957 .read_name = 0,
2958 .write_name = target->name,
2959 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2960 .value = value
2961 };
2962
2963 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2964
2965 // Here we're going to free the target, since it is no longer necessary.
2966 // However, we don't want to call `pm_node_destroy` because we want to keep
2967 // around all of its children since we just reused them.
2968 xfree(target);
2969
2970 return node;
2971}
2972
2977static void
2978pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979 if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2980 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981 pm_node_t *node;
2982 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2983 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2984 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2985 break;
2986 }
2987 }
2988 }
2989
2990 if (block != NULL) {
2991 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2992 }
2993 }
2994}
2995
2999static pm_index_and_write_node_t *
3000pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3001 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3002 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3003
3004 pm_index_arguments_check(parser, target->arguments, target->block);
3005
3006 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3007 *node = (pm_index_and_write_node_t) {
3008 {
3009 .type = PM_INDEX_AND_WRITE_NODE,
3010 .flags = target->base.flags,
3011 .node_id = PM_NODE_IDENTIFY(parser),
3012 .location = {
3013 .start = target->base.location.start,
3014 .end = value->location.end
3015 }
3016 },
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .opening_loc = target->opening_loc,
3020 .arguments = target->arguments,
3021 .closing_loc = target->closing_loc,
3022 .block = (pm_block_argument_node_t *) target->block,
3023 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3024 .value = value
3025 };
3026
3027 // Here we're going to free the target, since it is no longer necessary.
3028 // However, we don't want to call `pm_node_destroy` because we want to keep
3029 // around all of its children since we just reused them.
3030 xfree(target);
3031
3032 return node;
3033}
3034
3038static pm_call_operator_write_node_t *
3039pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3040 assert(target->block == NULL);
3041 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3042
3043 *node = (pm_call_operator_write_node_t) {
3044 {
3045 .type = PM_CALL_OPERATOR_WRITE_NODE,
3046 .flags = target->base.flags,
3047 .node_id = PM_NODE_IDENTIFY(parser),
3048 .location = {
3049 .start = target->base.location.start,
3050 .end = value->location.end
3051 }
3052 },
3053 .receiver = target->receiver,
3054 .call_operator_loc = target->call_operator_loc,
3055 .message_loc = target->message_loc,
3056 .read_name = 0,
3057 .write_name = target->name,
3058 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3059 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3060 .value = value
3061 };
3062
3063 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3064
3065 // Here we're going to free the target, since it is no longer necessary.
3066 // However, we don't want to call `pm_node_destroy` because we want to keep
3067 // around all of its children since we just reused them.
3068 xfree(target);
3069
3070 return node;
3071}
3072
3076static pm_index_operator_write_node_t *
3077pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3078 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3079
3080 pm_index_arguments_check(parser, target->arguments, target->block);
3081
3082 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3083 *node = (pm_index_operator_write_node_t) {
3084 {
3085 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3086 .flags = target->base.flags,
3087 .node_id = PM_NODE_IDENTIFY(parser),
3088 .location = {
3089 .start = target->base.location.start,
3090 .end = value->location.end
3091 }
3092 },
3093 .receiver = target->receiver,
3094 .call_operator_loc = target->call_operator_loc,
3095 .opening_loc = target->opening_loc,
3096 .arguments = target->arguments,
3097 .closing_loc = target->closing_loc,
3098 .block = (pm_block_argument_node_t *) target->block,
3099 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3100 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3101 .value = value
3102 };
3103
3104 // Here we're going to free the target, since it is no longer necessary.
3105 // However, we don't want to call `pm_node_destroy` because we want to keep
3106 // around all of its children since we just reused them.
3107 xfree(target);
3108
3109 return node;
3110}
3111
3115static pm_call_or_write_node_t *
3116pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3117 assert(target->block == NULL);
3118 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3119 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3120
3121 *node = (pm_call_or_write_node_t) {
3122 {
3123 .type = PM_CALL_OR_WRITE_NODE,
3124 .flags = target->base.flags,
3125 .node_id = PM_NODE_IDENTIFY(parser),
3126 .location = {
3127 .start = target->base.location.start,
3128 .end = value->location.end
3129 }
3130 },
3131 .receiver = target->receiver,
3132 .call_operator_loc = target->call_operator_loc,
3133 .message_loc = target->message_loc,
3134 .read_name = 0,
3135 .write_name = target->name,
3136 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3137 .value = value
3138 };
3139
3140 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3141
3142 // Here we're going to free the target, since it is no longer necessary.
3143 // However, we don't want to call `pm_node_destroy` because we want to keep
3144 // around all of its children since we just reused them.
3145 xfree(target);
3146
3147 return node;
3148}
3149
3153static pm_index_or_write_node_t *
3154pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3155 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3156 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3157
3158 pm_index_arguments_check(parser, target->arguments, target->block);
3159
3160 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3161 *node = (pm_index_or_write_node_t) {
3162 {
3163 .type = PM_INDEX_OR_WRITE_NODE,
3164 .flags = target->base.flags,
3165 .node_id = PM_NODE_IDENTIFY(parser),
3166 .location = {
3167 .start = target->base.location.start,
3168 .end = value->location.end
3169 }
3170 },
3171 .receiver = target->receiver,
3172 .call_operator_loc = target->call_operator_loc,
3173 .opening_loc = target->opening_loc,
3174 .arguments = target->arguments,
3175 .closing_loc = target->closing_loc,
3176 .block = (pm_block_argument_node_t *) target->block,
3177 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3178 .value = value
3179 };
3180
3181 // Here we're going to free the target, since it is no longer necessary.
3182 // However, we don't want to call `pm_node_destroy` because we want to keep
3183 // around all of its children since we just reused them.
3184 xfree(target);
3185
3186 return node;
3187}
3188
3193static pm_call_target_node_t *
3194pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3195 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3196
3197 *node = (pm_call_target_node_t) {
3198 {
3199 .type = PM_CALL_TARGET_NODE,
3200 .flags = target->base.flags,
3201 .node_id = PM_NODE_IDENTIFY(parser),
3202 .location = target->base.location
3203 },
3204 .receiver = target->receiver,
3205 .call_operator_loc = target->call_operator_loc,
3206 .name = target->name,
3207 .message_loc = target->message_loc
3208 };
3209
3210 // Here we're going to free the target, since it is no longer necessary.
3211 // However, we don't want to call `pm_node_destroy` because we want to keep
3212 // around all of its children since we just reused them.
3213 xfree(target);
3214
3215 return node;
3216}
3217
3222static pm_index_target_node_t *
3223pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3224 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3225 pm_node_flags_t flags = target->base.flags;
3226
3227 pm_index_arguments_check(parser, target->arguments, target->block);
3228
3229 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3230 *node = (pm_index_target_node_t) {
3231 {
3232 .type = PM_INDEX_TARGET_NODE,
3233 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3234 .node_id = PM_NODE_IDENTIFY(parser),
3235 .location = target->base.location
3236 },
3237 .receiver = target->receiver,
3238 .opening_loc = target->opening_loc,
3239 .arguments = target->arguments,
3240 .closing_loc = target->closing_loc,
3241 .block = (pm_block_argument_node_t *) target->block,
3242 };
3243
3244 // Here we're going to free the target, since it is no longer necessary.
3245 // However, we don't want to call `pm_node_destroy` because we want to keep
3246 // around all of its children since we just reused them.
3247 xfree(target);
3248
3249 return node;
3250}
3251
3255static pm_capture_pattern_node_t *
3256pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3257 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3258
3259 *node = (pm_capture_pattern_node_t) {
3260 {
3261 .type = PM_CAPTURE_PATTERN_NODE,
3262 .node_id = PM_NODE_IDENTIFY(parser),
3263 .location = {
3264 .start = value->location.start,
3265 .end = target->base.location.end
3266 },
3267 },
3268 .value = value,
3269 .target = target,
3270 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3271 };
3272
3273 return node;
3274}
3275
3279static pm_case_node_t *
3280pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3281 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3282
3283 *node = (pm_case_node_t) {
3284 {
3285 .type = PM_CASE_NODE,
3286 .node_id = PM_NODE_IDENTIFY(parser),
3287 .location = {
3288 .start = case_keyword->start,
3289 .end = end_keyword->end
3290 },
3291 },
3292 .predicate = predicate,
3293 .else_clause = NULL,
3294 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3295 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3296 .conditions = { 0 }
3297 };
3298
3299 return node;
3300}
3301
3305static void
3306pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3307 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3308
3309 pm_node_list_append(&node->conditions, condition);
3310 node->base.location.end = condition->location.end;
3311}
3312
3316static void
3317pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3318 node->else_clause = else_clause;
3319 node->base.location.end = else_clause->base.location.end;
3320}
3321
3325static void
3326pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3327 node->base.location.end = end_keyword->end;
3328 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3329}
3330
3334static pm_case_match_node_t *
3335pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3336 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3337
3338 *node = (pm_case_match_node_t) {
3339 {
3340 .type = PM_CASE_MATCH_NODE,
3341 .node_id = PM_NODE_IDENTIFY(parser),
3342 .location = {
3343 .start = case_keyword->start,
3344 .end = end_keyword->end
3345 },
3346 },
3347 .predicate = predicate,
3348 .else_clause = NULL,
3349 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3350 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3351 .conditions = { 0 }
3352 };
3353
3354 return node;
3355}
3356
3360static void
3361pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3362 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3363
3364 pm_node_list_append(&node->conditions, condition);
3365 node->base.location.end = condition->location.end;
3366}
3367
3371static void
3372pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3373 node->else_clause = else_clause;
3374 node->base.location.end = else_clause->base.location.end;
3375}
3376
3380static void
3381pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3382 node->base.location.end = end_keyword->end;
3383 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3384}
3385
3389static pm_class_node_t *
3390pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3391 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3392
3393 *node = (pm_class_node_t) {
3394 {
3395 .type = PM_CLASS_NODE,
3396 .node_id = PM_NODE_IDENTIFY(parser),
3397 .location = { .start = class_keyword->start, .end = end_keyword->end },
3398 },
3399 .locals = *locals,
3400 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3401 .constant_path = constant_path,
3402 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3403 .superclass = superclass,
3404 .body = body,
3405 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3406 .name = pm_parser_constant_id_token(parser, name)
3407 };
3408
3409 return node;
3410}
3411
3415static pm_class_variable_and_write_node_t *
3416pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3417 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3418 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3419
3420 *node = (pm_class_variable_and_write_node_t) {
3421 {
3422 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3423 .node_id = PM_NODE_IDENTIFY(parser),
3424 .location = {
3425 .start = target->base.location.start,
3426 .end = value->location.end
3427 }
3428 },
3429 .name = target->name,
3430 .name_loc = target->base.location,
3431 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3432 .value = value
3433 };
3434
3435 return node;
3436}
3437
3441static pm_class_variable_operator_write_node_t *
3442pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3443 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3444
3445 *node = (pm_class_variable_operator_write_node_t) {
3446 {
3447 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3448 .node_id = PM_NODE_IDENTIFY(parser),
3449 .location = {
3450 .start = target->base.location.start,
3451 .end = value->location.end
3452 }
3453 },
3454 .name = target->name,
3455 .name_loc = target->base.location,
3456 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3457 .value = value,
3458 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3459 };
3460
3461 return node;
3462}
3463
3467static pm_class_variable_or_write_node_t *
3468pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3469 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3470 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3471
3472 *node = (pm_class_variable_or_write_node_t) {
3473 {
3474 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3475 .node_id = PM_NODE_IDENTIFY(parser),
3476 .location = {
3477 .start = target->base.location.start,
3478 .end = value->location.end
3479 }
3480 },
3481 .name = target->name,
3482 .name_loc = target->base.location,
3483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3484 .value = value
3485 };
3486
3487 return node;
3488}
3489
3493static pm_class_variable_read_node_t *
3494pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3495 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3496 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3497
3498 *node = (pm_class_variable_read_node_t) {
3499 {
3500 .type = PM_CLASS_VARIABLE_READ_NODE,
3501 .node_id = PM_NODE_IDENTIFY(parser),
3502 .location = PM_LOCATION_TOKEN_VALUE(token)
3503 },
3504 .name = pm_parser_constant_id_token(parser, token)
3505 };
3506
3507 return node;
3508}
3509
3516static inline pm_node_flags_t
3517pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3518 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3519 return flags;
3520 }
3521 return 0;
3522}
3523
3527static pm_class_variable_write_node_t *
3528pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3529 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3530
3531 *node = (pm_class_variable_write_node_t) {
3532 {
3533 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3534 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3535 .node_id = PM_NODE_IDENTIFY(parser),
3536 .location = {
3537 .start = read_node->base.location.start,
3538 .end = value->location.end
3539 },
3540 },
3541 .name = read_node->name,
3542 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3543 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3544 .value = value
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_path_and_write_node_t *
3554pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3556 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3557
3558 *node = (pm_constant_path_and_write_node_t) {
3559 {
3560 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3561 .node_id = PM_NODE_IDENTIFY(parser),
3562 .location = {
3563 .start = target->base.location.start,
3564 .end = value->location.end
3565 }
3566 },
3567 .target = target,
3568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3569 .value = value
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_path_operator_write_node_t *
3579pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3581
3582 *node = (pm_constant_path_operator_write_node_t) {
3583 {
3584 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3585 .node_id = PM_NODE_IDENTIFY(parser),
3586 .location = {
3587 .start = target->base.location.start,
3588 .end = value->location.end
3589 }
3590 },
3591 .target = target,
3592 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3593 .value = value,
3594 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3595 };
3596
3597 return node;
3598}
3599
3603static pm_constant_path_or_write_node_t *
3604pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3605 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3606 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3607
3608 *node = (pm_constant_path_or_write_node_t) {
3609 {
3610 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3611 .node_id = PM_NODE_IDENTIFY(parser),
3612 .location = {
3613 .start = target->base.location.start,
3614 .end = value->location.end
3615 }
3616 },
3617 .target = target,
3618 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3619 .value = value
3620 };
3621
3622 return node;
3623}
3624
3628static pm_constant_path_node_t *
3629pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3630 pm_assert_value_expression(parser, parent);
3631 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3632
3633 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3634 if (name_token->type == PM_TOKEN_CONSTANT) {
3635 name = pm_parser_constant_id_token(parser, name_token);
3636 }
3637
3638 *node = (pm_constant_path_node_t) {
3639 {
3640 .type = PM_CONSTANT_PATH_NODE,
3641 .node_id = PM_NODE_IDENTIFY(parser),
3642 .location = {
3643 .start = parent == NULL ? delimiter->start : parent->location.start,
3644 .end = name_token->end
3645 },
3646 },
3647 .parent = parent,
3648 .name = name,
3649 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3650 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3651 };
3652
3653 return node;
3654}
3655
3659static pm_constant_path_write_node_t *
3660pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3661 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3662
3663 *node = (pm_constant_path_write_node_t) {
3664 {
3665 .type = PM_CONSTANT_PATH_WRITE_NODE,
3666 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3667 .node_id = PM_NODE_IDENTIFY(parser),
3668 .location = {
3669 .start = target->base.location.start,
3670 .end = value->location.end
3671 },
3672 },
3673 .target = target,
3674 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3675 .value = value
3676 };
3677
3678 return node;
3679}
3680
3684static pm_constant_and_write_node_t *
3685pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3687 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3688
3689 *node = (pm_constant_and_write_node_t) {
3690 {
3691 .type = PM_CONSTANT_AND_WRITE_NODE,
3692 .node_id = PM_NODE_IDENTIFY(parser),
3693 .location = {
3694 .start = target->base.location.start,
3695 .end = value->location.end
3696 }
3697 },
3698 .name = target->name,
3699 .name_loc = target->base.location,
3700 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3701 .value = value
3702 };
3703
3704 return node;
3705}
3706
3710static pm_constant_operator_write_node_t *
3711pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3712 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3713
3714 *node = (pm_constant_operator_write_node_t) {
3715 {
3716 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3717 .node_id = PM_NODE_IDENTIFY(parser),
3718 .location = {
3719 .start = target->base.location.start,
3720 .end = value->location.end
3721 }
3722 },
3723 .name = target->name,
3724 .name_loc = target->base.location,
3725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3726 .value = value,
3727 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3728 };
3729
3730 return node;
3731}
3732
3736static pm_constant_or_write_node_t *
3737pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3738 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3739 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3740
3741 *node = (pm_constant_or_write_node_t) {
3742 {
3743 .type = PM_CONSTANT_OR_WRITE_NODE,
3744 .node_id = PM_NODE_IDENTIFY(parser),
3745 .location = {
3746 .start = target->base.location.start,
3747 .end = value->location.end
3748 }
3749 },
3750 .name = target->name,
3751 .name_loc = target->base.location,
3752 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3753 .value = value
3754 };
3755
3756 return node;
3757}
3758
3762static pm_constant_read_node_t *
3763pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3764 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3765 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3766
3767 *node = (pm_constant_read_node_t) {
3768 {
3769 .type = PM_CONSTANT_READ_NODE,
3770 .node_id = PM_NODE_IDENTIFY(parser),
3771 .location = PM_LOCATION_TOKEN_VALUE(name)
3772 },
3773 .name = pm_parser_constant_id_token(parser, name)
3774 };
3775
3776 return node;
3777}
3778
3782static pm_constant_write_node_t *
3783pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3784 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3785
3786 *node = (pm_constant_write_node_t) {
3787 {
3788 .type = PM_CONSTANT_WRITE_NODE,
3789 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3790 .node_id = PM_NODE_IDENTIFY(parser),
3791 .location = {
3792 .start = target->base.location.start,
3793 .end = value->location.end
3794 }
3795 },
3796 .name = target->name,
3797 .name_loc = target->base.location,
3798 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3799 .value = value
3800 };
3801
3802 return node;
3803}
3804
3808static void
3809pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3810 switch (PM_NODE_TYPE(node)) {
3811 case PM_BEGIN_NODE: {
3812 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3813 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3814 break;
3815 }
3816 case PM_PARENTHESES_NODE: {
3817 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3818 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3819 break;
3820 }
3821 case PM_STATEMENTS_NODE: {
3822 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3823 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3824 break;
3825 }
3826 case PM_ARRAY_NODE:
3827 case PM_FLOAT_NODE:
3828 case PM_IMAGINARY_NODE:
3829 case PM_INTEGER_NODE:
3830 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3831 case PM_INTERPOLATED_STRING_NODE:
3832 case PM_INTERPOLATED_SYMBOL_NODE:
3833 case PM_INTERPOLATED_X_STRING_NODE:
3834 case PM_RATIONAL_NODE:
3835 case PM_REGULAR_EXPRESSION_NODE:
3836 case PM_SOURCE_ENCODING_NODE:
3837 case PM_SOURCE_FILE_NODE:
3838 case PM_SOURCE_LINE_NODE:
3839 case PM_STRING_NODE:
3840 case PM_SYMBOL_NODE:
3841 case PM_X_STRING_NODE:
3842 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3843 break;
3844 default:
3845 break;
3846 }
3847}
3848
3852static pm_def_node_t *
3853pm_def_node_create(
3854 pm_parser_t *parser,
3855 pm_constant_id_t name,
3856 const pm_token_t *name_loc,
3857 pm_node_t *receiver,
3858 pm_parameters_node_t *parameters,
3859 pm_node_t *body,
3860 pm_constant_id_list_t *locals,
3861 const pm_token_t *def_keyword,
3862 const pm_token_t *operator,
3863 const pm_token_t *lparen,
3864 const pm_token_t *rparen,
3865 const pm_token_t *equal,
3866 const pm_token_t *end_keyword
3867) {
3868 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3869 const uint8_t *end;
3870
3871 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3872 end = body->location.end;
3873 } else {
3874 end = end_keyword->end;
3875 }
3876
3877 if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3878 pm_def_node_receiver_check(parser, receiver);
3879 }
3880
3881 *node = (pm_def_node_t) {
3882 {
3883 .type = PM_DEF_NODE,
3884 .node_id = PM_NODE_IDENTIFY(parser),
3885 .location = { .start = def_keyword->start, .end = end },
3886 },
3887 .name = name,
3888 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3889 .receiver = receiver,
3890 .parameters = parameters,
3891 .body = body,
3892 .locals = *locals,
3893 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3894 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3895 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3896 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3897 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3898 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3899 };
3900
3901 return node;
3902}
3903
3907static pm_defined_node_t *
3908pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3909 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3910
3911 *node = (pm_defined_node_t) {
3912 {
3913 .type = PM_DEFINED_NODE,
3914 .node_id = PM_NODE_IDENTIFY(parser),
3915 .location = {
3916 .start = keyword_loc->start,
3917 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3918 },
3919 },
3920 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3921 .value = value,
3922 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3923 .keyword_loc = *keyword_loc
3924 };
3925
3926 return node;
3927}
3928
3932static pm_else_node_t *
3933pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3934 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3935 const uint8_t *end = NULL;
3936 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3937 end = statements->base.location.end;
3938 } else {
3939 end = end_keyword->end;
3940 }
3941
3942 *node = (pm_else_node_t) {
3943 {
3944 .type = PM_ELSE_NODE,
3945 .node_id = PM_NODE_IDENTIFY(parser),
3946 .location = {
3947 .start = else_keyword->start,
3948 .end = end,
3949 },
3950 },
3951 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3952 .statements = statements,
3953 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3954 };
3955
3956 return node;
3957}
3958
3962static pm_embedded_statements_node_t *
3963pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3964 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3965
3966 *node = (pm_embedded_statements_node_t) {
3967 {
3968 .type = PM_EMBEDDED_STATEMENTS_NODE,
3969 .node_id = PM_NODE_IDENTIFY(parser),
3970 .location = {
3971 .start = opening->start,
3972 .end = closing->end
3973 }
3974 },
3975 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3976 .statements = statements,
3977 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3978 };
3979
3980 return node;
3981}
3982
3986static pm_embedded_variable_node_t *
3987pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3988 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3989
3990 *node = (pm_embedded_variable_node_t) {
3991 {
3992 .type = PM_EMBEDDED_VARIABLE_NODE,
3993 .node_id = PM_NODE_IDENTIFY(parser),
3994 .location = {
3995 .start = operator->start,
3996 .end = variable->location.end
3997 }
3998 },
3999 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4000 .variable = variable
4001 };
4002
4003 return node;
4004}
4005
4009static pm_ensure_node_t *
4010pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4011 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4012
4013 *node = (pm_ensure_node_t) {
4014 {
4015 .type = PM_ENSURE_NODE,
4016 .node_id = PM_NODE_IDENTIFY(parser),
4017 .location = {
4018 .start = ensure_keyword->start,
4019 .end = end_keyword->end
4020 },
4021 },
4022 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4023 .statements = statements,
4024 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4025 };
4026
4027 return node;
4028}
4029
4033static pm_false_node_t *
4034pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4035 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4036 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4037
4038 *node = (pm_false_node_t) {{
4039 .type = PM_FALSE_NODE,
4040 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4041 .node_id = PM_NODE_IDENTIFY(parser),
4042 .location = PM_LOCATION_TOKEN_VALUE(token)
4043 }};
4044
4045 return node;
4046}
4047
4052static pm_find_pattern_node_t *
4053pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4054 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4055
4056 pm_node_t *left = nodes->nodes[0];
4057 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4058 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4059
4060 pm_node_t *right;
4061
4062 if (nodes->size == 1) {
4063 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4064 } else {
4065 right = nodes->nodes[nodes->size - 1];
4066 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4067 }
4068
4069#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4070 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4071 // The resulting AST will anyway be ignored, but this file still needs to compile.
4072 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4073#else
4074 pm_node_t *right_splat_node = right;
4075#endif
4076 *node = (pm_find_pattern_node_t) {
4077 {
4078 .type = PM_FIND_PATTERN_NODE,
4079 .node_id = PM_NODE_IDENTIFY(parser),
4080 .location = {
4081 .start = left->location.start,
4082 .end = right->location.end,
4083 },
4084 },
4085 .constant = NULL,
4086 .left = left_splat_node,
4087 .right = right_splat_node,
4088 .requireds = { 0 },
4089 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4090 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4091 };
4092
4093 // For now we're going to just copy over each pointer manually. This could be
4094 // much more efficient, as we could instead resize the node list to only point
4095 // to 1...-1.
4096 for (size_t index = 1; index < nodes->size - 1; index++) {
4097 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4098 }
4099
4100 return node;
4101}
4102
4107static double
4108pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4109 ptrdiff_t diff = token->end - token->start;
4110 if (diff <= 0) return 0.0;
4111
4112 // First, get a buffer of the content.
4113 size_t length = (size_t) diff;
4114 char *buffer = xmalloc(sizeof(char) * (length + 1));
4115 memcpy((void *) buffer, token->start, length);
4116
4117 // Next, determine if we need to replace the decimal point because of
4118 // locale-specific options, and then normalize them if we have to.
4119 char decimal_point = *localeconv()->decimal_point;
4120 if (decimal_point != '.') {
4121 for (size_t index = 0; index < length; index++) {
4122 if (buffer[index] == '.') buffer[index] = decimal_point;
4123 }
4124 }
4125
4126 // Next, handle underscores by removing them from the buffer.
4127 for (size_t index = 0; index < length; index++) {
4128 if (buffer[index] == '_') {
4129 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4130 length--;
4131 }
4132 }
4133
4134 // Null-terminate the buffer so that strtod cannot read off the end.
4135 buffer[length] = '\0';
4136
4137 // Now, call strtod to parse the value. Note that CRuby has their own
4138 // version of strtod which avoids locales. We're okay using the locale-aware
4139 // version because we've already validated through the parser that the token
4140 // is in a valid format.
4141 errno = 0;
4142 char *eptr;
4143 double value = strtod(buffer, &eptr);
4144
4145 // This should never happen, because we've already checked that the token
4146 // is in a valid format. However it's good to be safe.
4147 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4148 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4149 xfree((void *) buffer);
4150 return 0.0;
4151 }
4152
4153 // If errno is set, then it should only be ERANGE. At this point we need to
4154 // check if it's infinity (it should be).
4155 if (errno == ERANGE && PRISM_ISINF(value)) {
4156 int warn_width;
4157 const char *ellipsis;
4158
4159 if (length > 20) {
4160 warn_width = 20;
4161 ellipsis = "...";
4162 } else {
4163 warn_width = (int) length;
4164 ellipsis = "";
4165 }
4166
4167 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4168 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4169 }
4170
4171 // Finally we can free the buffer and return the value.
4172 xfree((void *) buffer);
4173 return value;
4174}
4175
4179static pm_float_node_t *
4180pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4181 assert(token->type == PM_TOKEN_FLOAT);
4182 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4183
4184 *node = (pm_float_node_t) {
4185 {
4186 .type = PM_FLOAT_NODE,
4187 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4188 .node_id = PM_NODE_IDENTIFY(parser),
4189 .location = PM_LOCATION_TOKEN_VALUE(token)
4190 },
4191 .value = pm_double_parse(parser, token)
4192 };
4193
4194 return node;
4195}
4196
4200static pm_imaginary_node_t *
4201pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4202 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4203
4204 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4205 *node = (pm_imaginary_node_t) {
4206 {
4207 .type = PM_IMAGINARY_NODE,
4208 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4209 .node_id = PM_NODE_IDENTIFY(parser),
4210 .location = PM_LOCATION_TOKEN_VALUE(token)
4211 },
4212 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4213 .type = PM_TOKEN_FLOAT,
4214 .start = token->start,
4215 .end = token->end - 1
4216 }))
4217 };
4218
4219 return node;
4220}
4221
4225static pm_rational_node_t *
4226pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4227 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4228
4229 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4230 *node = (pm_rational_node_t) {
4231 {
4232 .type = PM_RATIONAL_NODE,
4233 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4234 .node_id = PM_NODE_IDENTIFY(parser),
4235 .location = PM_LOCATION_TOKEN_VALUE(token)
4236 },
4237 .numerator = { 0 },
4238 .denominator = { 0 }
4239 };
4240
4241 const uint8_t *start = token->start;
4242 const uint8_t *end = token->end - 1; // r
4243
4244 while (start < end && *start == '0') start++; // 0.1 -> .1
4245 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4246
4247 size_t length = (size_t) (end - start);
4248 if (length == 1) {
4249 node->denominator.value = 1;
4250 return node;
4251 }
4252
4253 const uint8_t *point = memchr(start, '.', length);
4254 assert(point && "should have a decimal point");
4255
4256 uint8_t *digits = malloc(length);
4257 if (digits == NULL) {
4258 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259 abort();
4260 }
4261
4262 memcpy(digits, start, (unsigned long) (point - start));
4263 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4264 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4265
4266 digits[0] = '1';
4267 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269 free(digits);
4270
4271 pm_integers_reduce(&node->numerator, &node->denominator);
4272 return node;
4273}
4274
4279static pm_imaginary_node_t *
4280pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4281 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4282
4283 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4284 *node = (pm_imaginary_node_t) {
4285 {
4286 .type = PM_IMAGINARY_NODE,
4287 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4288 .node_id = PM_NODE_IDENTIFY(parser),
4289 .location = PM_LOCATION_TOKEN_VALUE(token)
4290 },
4291 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4292 .type = PM_TOKEN_FLOAT_RATIONAL,
4293 .start = token->start,
4294 .end = token->end - 1
4295 }))
4296 };
4297
4298 return node;
4299}
4300
4304static pm_for_node_t *
4305pm_for_node_create(
4306 pm_parser_t *parser,
4307 pm_node_t *index,
4308 pm_node_t *collection,
4309 pm_statements_node_t *statements,
4310 const pm_token_t *for_keyword,
4311 const pm_token_t *in_keyword,
4312 const pm_token_t *do_keyword,
4313 const pm_token_t *end_keyword
4314) {
4315 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4316
4317 *node = (pm_for_node_t) {
4318 {
4319 .type = PM_FOR_NODE,
4320 .node_id = PM_NODE_IDENTIFY(parser),
4321 .location = {
4322 .start = for_keyword->start,
4323 .end = end_keyword->end
4324 },
4325 },
4326 .index = index,
4327 .collection = collection,
4328 .statements = statements,
4329 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4330 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4331 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4332 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4333 };
4334
4335 return node;
4336}
4337
4341static pm_forwarding_arguments_node_t *
4342pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4343 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4344 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4345
4346 *node = (pm_forwarding_arguments_node_t) {{
4347 .type = PM_FORWARDING_ARGUMENTS_NODE,
4348 .node_id = PM_NODE_IDENTIFY(parser),
4349 .location = PM_LOCATION_TOKEN_VALUE(token)
4350 }};
4351
4352 return node;
4353}
4354
4358static pm_forwarding_parameter_node_t *
4359pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4360 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4361 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4362
4363 *node = (pm_forwarding_parameter_node_t) {{
4364 .type = PM_FORWARDING_PARAMETER_NODE,
4365 .node_id = PM_NODE_IDENTIFY(parser),
4366 .location = PM_LOCATION_TOKEN_VALUE(token)
4367 }};
4368
4369 return node;
4370}
4371
4375static pm_forwarding_super_node_t *
4376pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4377 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4378 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4379 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4380
4381 pm_block_node_t *block = NULL;
4382 if (arguments->block != NULL) {
4383 block = (pm_block_node_t *) arguments->block;
4384 }
4385
4386 *node = (pm_forwarding_super_node_t) {
4387 {
4388 .type = PM_FORWARDING_SUPER_NODE,
4389 .node_id = PM_NODE_IDENTIFY(parser),
4390 .location = {
4391 .start = token->start,
4392 .end = block != NULL ? block->base.location.end : token->end
4393 },
4394 },
4395 .block = block
4396 };
4397
4398 return node;
4399}
4400
4405static pm_hash_pattern_node_t *
4406pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4407 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4408
4409 *node = (pm_hash_pattern_node_t) {
4410 {
4411 .type = PM_HASH_PATTERN_NODE,
4412 .node_id = PM_NODE_IDENTIFY(parser),
4413 .location = {
4414 .start = opening->start,
4415 .end = closing->end
4416 },
4417 },
4418 .constant = NULL,
4419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4421 .elements = { 0 },
4422 .rest = NULL
4423 };
4424
4425 return node;
4426}
4427
4431static pm_hash_pattern_node_t *
4432pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4433 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4434
4435 const uint8_t *start;
4436 const uint8_t *end;
4437
4438 if (elements->size > 0) {
4439 if (rest) {
4440 start = elements->nodes[0]->location.start;
4441 end = rest->location.end;
4442 } else {
4443 start = elements->nodes[0]->location.start;
4444 end = elements->nodes[elements->size - 1]->location.end;
4445 }
4446 } else {
4447 assert(rest != NULL);
4448 start = rest->location.start;
4449 end = rest->location.end;
4450 }
4451
4452 *node = (pm_hash_pattern_node_t) {
4453 {
4454 .type = PM_HASH_PATTERN_NODE,
4455 .node_id = PM_NODE_IDENTIFY(parser),
4456 .location = {
4457 .start = start,
4458 .end = end
4459 },
4460 },
4461 .constant = NULL,
4462 .elements = { 0 },
4463 .rest = rest,
4464 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4465 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4466 };
4467
4468 pm_node_t *element;
4469 PM_NODE_LIST_FOREACH(elements, index, element) {
4470 pm_node_list_append(&node->elements, element);
4471 }
4472
4473 return node;
4474}
4475
4479static pm_constant_id_t
4480pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4481 switch (PM_NODE_TYPE(target)) {
4482 case PM_GLOBAL_VARIABLE_READ_NODE:
4483 return ((pm_global_variable_read_node_t *) target)->name;
4484 case PM_BACK_REFERENCE_READ_NODE:
4485 return ((pm_back_reference_read_node_t *) target)->name;
4486 case PM_NUMBERED_REFERENCE_READ_NODE:
4487 // This will only ever happen in the event of a syntax error, but we
4488 // still need to provide something for the node.
4489 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4490 default:
4491 assert(false && "unreachable");
4492 return (pm_constant_id_t) -1;
4493 }
4494}
4495
4499static pm_global_variable_and_write_node_t *
4500pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4501 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4502 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4503
4504 *node = (pm_global_variable_and_write_node_t) {
4505 {
4506 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4507 .node_id = PM_NODE_IDENTIFY(parser),
4508 .location = {
4509 .start = target->location.start,
4510 .end = value->location.end
4511 }
4512 },
4513 .name = pm_global_variable_write_name(parser, target),
4514 .name_loc = target->location,
4515 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4516 .value = value
4517 };
4518
4519 return node;
4520}
4521
4525static pm_global_variable_operator_write_node_t *
4526pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4527 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4528
4529 *node = (pm_global_variable_operator_write_node_t) {
4530 {
4531 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4532 .node_id = PM_NODE_IDENTIFY(parser),
4533 .location = {
4534 .start = target->location.start,
4535 .end = value->location.end
4536 }
4537 },
4538 .name = pm_global_variable_write_name(parser, target),
4539 .name_loc = target->location,
4540 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4541 .value = value,
4542 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4543 };
4544
4545 return node;
4546}
4547
4551static pm_global_variable_or_write_node_t *
4552pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4554 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4555
4556 *node = (pm_global_variable_or_write_node_t) {
4557 {
4558 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4559 .node_id = PM_NODE_IDENTIFY(parser),
4560 .location = {
4561 .start = target->location.start,
4562 .end = value->location.end
4563 }
4564 },
4565 .name = pm_global_variable_write_name(parser, target),
4566 .name_loc = target->location,
4567 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4568 .value = value
4569 };
4570
4571 return node;
4572}
4573
4577static pm_global_variable_read_node_t *
4578pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4579 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4580
4581 *node = (pm_global_variable_read_node_t) {
4582 {
4583 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4584 .node_id = PM_NODE_IDENTIFY(parser),
4585 .location = PM_LOCATION_TOKEN_VALUE(name),
4586 },
4587 .name = pm_parser_constant_id_token(parser, name)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_global_variable_read_node_t *
4597pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4598 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4599
4600 *node = (pm_global_variable_read_node_t) {
4601 {
4602 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4603 .node_id = PM_NODE_IDENTIFY(parser),
4604 .location = PM_LOCATION_NULL_VALUE(parser)
4605 },
4606 .name = name
4607 };
4608
4609 return node;
4610}
4611
4615static pm_global_variable_write_node_t *
4616pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4618
4619 *node = (pm_global_variable_write_node_t) {
4620 {
4621 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4622 .node_id = PM_NODE_IDENTIFY(parser),
4623 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4624 .location = {
4625 .start = target->location.start,
4626 .end = value->location.end
4627 },
4628 },
4629 .name = pm_global_variable_write_name(parser, target),
4630 .name_loc = PM_LOCATION_NODE_VALUE(target),
4631 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4632 .value = value
4633 };
4634
4635 return node;
4636}
4637
4641static pm_global_variable_write_node_t *
4642pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4643 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4644
4645 *node = (pm_global_variable_write_node_t) {
4646 {
4647 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4648 .node_id = PM_NODE_IDENTIFY(parser),
4649 .location = PM_LOCATION_NULL_VALUE(parser)
4650 },
4651 .name = name,
4652 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4653 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .value = value
4655 };
4656
4657 return node;
4658}
4659
4663static pm_hash_node_t *
4664pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4665 assert(opening != NULL);
4666 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4667
4668 *node = (pm_hash_node_t) {
4669 {
4670 .type = PM_HASH_NODE,
4671 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4672 .node_id = PM_NODE_IDENTIFY(parser),
4673 .location = PM_LOCATION_TOKEN_VALUE(opening)
4674 },
4675 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4676 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4677 .elements = { 0 }
4678 };
4679
4680 return node;
4681}
4682
4686static inline void
4687pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4688 pm_node_list_append(&hash->elements, element);
4689
4690 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4691 if (static_literal) {
4692 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4693 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4694 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4696 }
4697
4698 if (!static_literal) {
4699 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4700 }
4701}
4702
4703static inline void
4704pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4705 hash->base.location.end = token->end;
4706 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4707}
4708
4712static pm_if_node_t *
4713pm_if_node_create(pm_parser_t *parser,
4714 const pm_token_t *if_keyword,
4715 pm_node_t *predicate,
4716 const pm_token_t *then_keyword,
4717 pm_statements_node_t *statements,
4718 pm_node_t *subsequent,
4719 const pm_token_t *end_keyword
4720) {
4721 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4722 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4723
4724 const uint8_t *end;
4725 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4726 end = end_keyword->end;
4727 } else if (subsequent != NULL) {
4728 end = subsequent->location.end;
4729 } else if (pm_statements_node_body_length(statements) != 0) {
4730 end = statements->base.location.end;
4731 } else {
4732 end = predicate->location.end;
4733 }
4734
4735 *node = (pm_if_node_t) {
4736 {
4737 .type = PM_IF_NODE,
4738 .flags = PM_NODE_FLAG_NEWLINE,
4739 .node_id = PM_NODE_IDENTIFY(parser),
4740 .location = {
4741 .start = if_keyword->start,
4742 .end = end
4743 },
4744 },
4745 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4746 .predicate = predicate,
4747 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4748 .statements = statements,
4749 .subsequent = subsequent,
4750 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4751 };
4752
4753 return node;
4754}
4755
4759static pm_if_node_t *
4760pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4761 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4762 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4763
4764 pm_statements_node_t *statements = pm_statements_node_create(parser);
4765 pm_statements_node_body_append(parser, statements, statement, true);
4766
4767 *node = (pm_if_node_t) {
4768 {
4769 .type = PM_IF_NODE,
4770 .flags = PM_NODE_FLAG_NEWLINE,
4771 .node_id = PM_NODE_IDENTIFY(parser),
4772 .location = {
4773 .start = statement->location.start,
4774 .end = predicate->location.end
4775 },
4776 },
4777 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4778 .predicate = predicate,
4779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4780 .statements = statements,
4781 .subsequent = NULL,
4782 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4783 };
4784
4785 return node;
4786}
4787
4791static pm_if_node_t *
4792pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4793 pm_assert_value_expression(parser, predicate);
4794 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4795
4796 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4797 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4798
4799 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4800 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4801
4802 pm_token_t end_keyword = not_provided(parser);
4803 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4804
4805 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4806
4807 *node = (pm_if_node_t) {
4808 {
4809 .type = PM_IF_NODE,
4810 .flags = PM_NODE_FLAG_NEWLINE,
4811 .node_id = PM_NODE_IDENTIFY(parser),
4812 .location = {
4813 .start = predicate->location.start,
4814 .end = false_expression->location.end,
4815 },
4816 },
4817 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4818 .predicate = predicate,
4819 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4820 .statements = if_statements,
4821 .subsequent = (pm_node_t *) else_node,
4822 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4823 };
4824
4825 return node;
4826
4827}
4828
4829static inline void
4830pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4831 node->base.location.end = keyword->end;
4832 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4833}
4834
4835static inline void
4836pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4837 node->base.location.end = keyword->end;
4838 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4839}
4840
4844static pm_implicit_node_t *
4845pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4846 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4847
4848 *node = (pm_implicit_node_t) {
4849 {
4850 .type = PM_IMPLICIT_NODE,
4851 .node_id = PM_NODE_IDENTIFY(parser),
4852 .location = value->location
4853 },
4854 .value = value
4855 };
4856
4857 return node;
4858}
4859
4863static pm_implicit_rest_node_t *
4864pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4865 assert(token->type == PM_TOKEN_COMMA);
4866
4867 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4868
4869 *node = (pm_implicit_rest_node_t) {
4870 {
4871 .type = PM_IMPLICIT_REST_NODE,
4872 .node_id = PM_NODE_IDENTIFY(parser),
4873 .location = PM_LOCATION_TOKEN_VALUE(token)
4874 }
4875 };
4876
4877 return node;
4878}
4879
4883static pm_integer_node_t *
4884pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4885 assert(token->type == PM_TOKEN_INTEGER);
4886 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4887
4888 *node = (pm_integer_node_t) {
4889 {
4890 .type = PM_INTEGER_NODE,
4891 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4892 .node_id = PM_NODE_IDENTIFY(parser),
4893 .location = PM_LOCATION_TOKEN_VALUE(token)
4894 },
4895 .value = { 0 }
4896 };
4897
4898 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4899 switch (base) {
4900 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4901 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4902 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4903 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4904 default: assert(false && "unreachable"); break;
4905 }
4906
4907 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4908 return node;
4909}
4910
4915static pm_imaginary_node_t *
4916pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4917 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4918
4919 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4920 *node = (pm_imaginary_node_t) {
4921 {
4922 .type = PM_IMAGINARY_NODE,
4923 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4924 .node_id = PM_NODE_IDENTIFY(parser),
4925 .location = PM_LOCATION_TOKEN_VALUE(token)
4926 },
4927 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4928 .type = PM_TOKEN_INTEGER,
4929 .start = token->start,
4930 .end = token->end - 1
4931 }))
4932 };
4933
4934 return node;
4935}
4936
4941static pm_rational_node_t *
4942pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4943 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4944
4945 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4946 *node = (pm_rational_node_t) {
4947 {
4948 .type = PM_RATIONAL_NODE,
4949 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4950 .node_id = PM_NODE_IDENTIFY(parser),
4951 .location = PM_LOCATION_TOKEN_VALUE(token)
4952 },
4953 .numerator = { 0 },
4954 .denominator = { .value = 1, 0 }
4955 };
4956
4957 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4958 switch (base) {
4959 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4960 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4961 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4962 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4963 default: assert(false && "unreachable"); break;
4964 }
4965
4966 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4967
4968 return node;
4969}
4970
4975static pm_imaginary_node_t *
4976pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4977 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4978
4979 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4980 *node = (pm_imaginary_node_t) {
4981 {
4982 .type = PM_IMAGINARY_NODE,
4983 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4984 .node_id = PM_NODE_IDENTIFY(parser),
4985 .location = PM_LOCATION_TOKEN_VALUE(token)
4986 },
4987 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4988 .type = PM_TOKEN_INTEGER_RATIONAL,
4989 .start = token->start,
4990 .end = token->end - 1
4991 }))
4992 };
4993
4994 return node;
4995}
4996
5000static pm_in_node_t *
5001pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5002 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5003
5004 const uint8_t *end;
5005 if (statements != NULL) {
5006 end = statements->base.location.end;
5007 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5008 end = then_keyword->end;
5009 } else {
5010 end = pattern->location.end;
5011 }
5012
5013 *node = (pm_in_node_t) {
5014 {
5015 .type = PM_IN_NODE,
5016 .node_id = PM_NODE_IDENTIFY(parser),
5017 .location = {
5018 .start = in_keyword->start,
5019 .end = end
5020 },
5021 },
5022 .pattern = pattern,
5023 .statements = statements,
5024 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5025 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5026 };
5027
5028 return node;
5029}
5030
5034static pm_instance_variable_and_write_node_t *
5035pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5036 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5037 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5038
5039 *node = (pm_instance_variable_and_write_node_t) {
5040 {
5041 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5042 .node_id = PM_NODE_IDENTIFY(parser),
5043 .location = {
5044 .start = target->base.location.start,
5045 .end = value->location.end
5046 }
5047 },
5048 .name = target->name,
5049 .name_loc = target->base.location,
5050 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5051 .value = value
5052 };
5053
5054 return node;
5055}
5056
5060static pm_instance_variable_operator_write_node_t *
5061pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5062 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5063
5064 *node = (pm_instance_variable_operator_write_node_t) {
5065 {
5066 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5067 .node_id = PM_NODE_IDENTIFY(parser),
5068 .location = {
5069 .start = target->base.location.start,
5070 .end = value->location.end
5071 }
5072 },
5073 .name = target->name,
5074 .name_loc = target->base.location,
5075 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5076 .value = value,
5077 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5078 };
5079
5080 return node;
5081}
5082
5086static pm_instance_variable_or_write_node_t *
5087pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5088 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5089 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5090
5091 *node = (pm_instance_variable_or_write_node_t) {
5092 {
5093 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5094 .node_id = PM_NODE_IDENTIFY(parser),
5095 .location = {
5096 .start = target->base.location.start,
5097 .end = value->location.end
5098 }
5099 },
5100 .name = target->name,
5101 .name_loc = target->base.location,
5102 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5103 .value = value
5104 };
5105
5106 return node;
5107}
5108
5112static pm_instance_variable_read_node_t *
5113pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5114 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5115 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5116
5117 *node = (pm_instance_variable_read_node_t) {
5118 {
5119 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5120 .node_id = PM_NODE_IDENTIFY(parser),
5121 .location = PM_LOCATION_TOKEN_VALUE(token)
5122 },
5123 .name = pm_parser_constant_id_token(parser, token)
5124 };
5125
5126 return node;
5127}
5128
5133static pm_instance_variable_write_node_t *
5134pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5135 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5136 *node = (pm_instance_variable_write_node_t) {
5137 {
5138 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5139 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5140 .node_id = PM_NODE_IDENTIFY(parser),
5141 .location = {
5142 .start = read_node->base.location.start,
5143 .end = value->location.end
5144 }
5145 },
5146 .name = read_node->name,
5147 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5148 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5149 .value = value
5150 };
5151
5152 return node;
5153}
5154
5160static void
5161pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5162 switch (PM_NODE_TYPE(part)) {
5163 case PM_STRING_NODE:
5164 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5165 break;
5166 case PM_EMBEDDED_STATEMENTS_NODE: {
5167 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5168 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5169
5170 if (embedded == NULL) {
5171 // If there are no statements or more than one statement, then
5172 // we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5175 // If the embedded statement is a string, then we can keep the
5176 // static literal flag and mark the string as frozen.
5177 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5178 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5179 // If the embedded statement is an interpolated string and it's
5180 // a static literal, then we can keep the static literal flag.
5181 } else {
5182 // Otherwise we lose the static literal flag.
5183 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5184 }
5185
5186 break;
5187 }
5188 case PM_EMBEDDED_VARIABLE_NODE:
5189 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5190 break;
5191 default:
5192 assert(false && "unexpected node type");
5193 break;
5194 }
5195
5196 pm_node_list_append(parts, part);
5197}
5198
5202static pm_interpolated_regular_expression_node_t *
5203pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5204 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5205
5206 *node = (pm_interpolated_regular_expression_node_t) {
5207 {
5208 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5210 .node_id = PM_NODE_IDENTIFY(parser),
5211 .location = {
5212 .start = opening->start,
5213 .end = NULL,
5214 },
5215 },
5216 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5217 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .parts = { 0 }
5219 };
5220
5221 return node;
5222}
5223
5224static inline void
5225pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5226 if (node->base.location.start > part->location.start) {
5227 node->base.location.start = part->location.start;
5228 }
5229 if (node->base.location.end < part->location.end) {
5230 node->base.location.end = part->location.end;
5231 }
5232
5233 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5234}
5235
5236static inline void
5237pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5238 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5239 node->base.location.end = closing->end;
5240 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5241}
5242
5266static inline void
5267pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5268#define CLEAR_FLAGS(node) \
5269 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5270
5271#define MUTABLE_FLAGS(node) \
5272 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5273
5274 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5275 node->base.location.start = part->location.start;
5276 }
5277
5278 node->base.location.end = MAX(node->base.location.end, part->location.end);
5279
5280 switch (PM_NODE_TYPE(part)) {
5281 case PM_STRING_NODE:
5282 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5283 break;
5284 case PM_INTERPOLATED_STRING_NODE:
5285 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5286 // If the string that we're concatenating is a static literal,
5287 // then we can keep the static literal flag for this string.
5288 } else {
5289 // Otherwise, we lose the static literal flag here and we should
5290 // also clear the mutability flags.
5291 CLEAR_FLAGS(node);
5292 }
5293 break;
5294 case PM_EMBEDDED_STATEMENTS_NODE: {
5295 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5296 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5297
5298 if (embedded == NULL) {
5299 // If we're embedding multiple statements or no statements, then
5300 // the string is not longer a static literal.
5301 CLEAR_FLAGS(node);
5302 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5303 // If the embedded statement is a string, then we can make that
5304 // string as frozen and static literal, and not touch the static
5305 // literal status of this string.
5306 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5307
5308 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5309 MUTABLE_FLAGS(node);
5310 }
5311 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5312 // If the embedded statement is an interpolated string, but that
5313 // string is marked as static literal, then we can keep our
5314 // static literal status for this string.
5315 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 MUTABLE_FLAGS(node);
5317 }
5318 } else {
5319 // In all other cases, we lose the static literal flag here and
5320 // become mutable.
5321 CLEAR_FLAGS(node);
5322 }
5323
5324 break;
5325 }
5326 case PM_EMBEDDED_VARIABLE_NODE:
5327 // Embedded variables clear static literal, which means we also
5328 // should clear the mutability flags.
5329 CLEAR_FLAGS(node);
5330 break;
5331 default:
5332 assert(false && "unexpected node type");
5333 break;
5334 }
5335
5336 pm_node_list_append(&node->parts, part);
5337
5338#undef CLEAR_FLAGS
5339#undef MUTABLE_FLAGS
5340}
5341
5345static pm_interpolated_string_node_t *
5346pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5347 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5348 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5349
5350 switch (parser->frozen_string_literal) {
5351 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5352 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5353 break;
5354 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5355 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5356 break;
5357 }
5358
5359 *node = (pm_interpolated_string_node_t) {
5360 {
5361 .type = PM_INTERPOLATED_STRING_NODE,
5362 .flags = flags,
5363 .node_id = PM_NODE_IDENTIFY(parser),
5364 .location = {
5365 .start = opening->start,
5366 .end = closing->end,
5367 },
5368 },
5369 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5370 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5371 .parts = { 0 }
5372 };
5373
5374 if (parts != NULL) {
5375 pm_node_t *part;
5376 PM_NODE_LIST_FOREACH(parts, index, part) {
5377 pm_interpolated_string_node_append(node, part);
5378 }
5379 }
5380
5381 return node;
5382}
5383
5387static void
5388pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5389 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5390 node->base.location.end = closing->end;
5391}
5392
5393static void
5394pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5395 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5396 node->base.location.start = part->location.start;
5397 }
5398
5399 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5400 node->base.location.end = MAX(node->base.location.end, part->location.end);
5401}
5402
5403static void
5404pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5405 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5406 node->base.location.end = closing->end;
5407}
5408
5412static pm_interpolated_symbol_node_t *
5413pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5414 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5415
5416 *node = (pm_interpolated_symbol_node_t) {
5417 {
5418 .type = PM_INTERPOLATED_SYMBOL_NODE,
5419 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5420 .node_id = PM_NODE_IDENTIFY(parser),
5421 .location = {
5422 .start = opening->start,
5423 .end = closing->end,
5424 },
5425 },
5426 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5427 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5428 .parts = { 0 }
5429 };
5430
5431 if (parts != NULL) {
5432 pm_node_t *part;
5433 PM_NODE_LIST_FOREACH(parts, index, part) {
5434 pm_interpolated_symbol_node_append(node, part);
5435 }
5436 }
5437
5438 return node;
5439}
5440
5444static pm_interpolated_x_string_node_t *
5445pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5446 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5447
5448 *node = (pm_interpolated_x_string_node_t) {
5449 {
5450 .type = PM_INTERPOLATED_X_STRING_NODE,
5451 .node_id = PM_NODE_IDENTIFY(parser),
5452 .location = {
5453 .start = opening->start,
5454 .end = closing->end
5455 },
5456 },
5457 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5458 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5459 .parts = { 0 }
5460 };
5461
5462 return node;
5463}
5464
5465static inline void
5466pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5467 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5468 node->base.location.end = part->location.end;
5469}
5470
5471static inline void
5472pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5473 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5474 node->base.location.end = closing->end;
5475}
5476
5480static pm_it_local_variable_read_node_t *
5481pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5482 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5483
5484 *node = (pm_it_local_variable_read_node_t) {
5485 {
5486 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5487 .node_id = PM_NODE_IDENTIFY(parser),
5488 .location = PM_LOCATION_TOKEN_VALUE(name)
5489 }
5490 };
5491
5492 return node;
5493}
5494
5498static pm_it_parameters_node_t *
5499pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5500 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5501
5502 *node = (pm_it_parameters_node_t) {
5503 {
5504 .type = PM_IT_PARAMETERS_NODE,
5505 .node_id = PM_NODE_IDENTIFY(parser),
5506 .location = {
5507 .start = opening->start,
5508 .end = closing->end
5509 }
5510 }
5511 };
5512
5513 return node;
5514}
5515
5519static pm_keyword_hash_node_t *
5520pm_keyword_hash_node_create(pm_parser_t *parser) {
5521 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5522
5523 *node = (pm_keyword_hash_node_t) {
5524 .base = {
5525 .type = PM_KEYWORD_HASH_NODE,
5526 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5527 .node_id = PM_NODE_IDENTIFY(parser),
5528 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5529 },
5530 .elements = { 0 }
5531 };
5532
5533 return node;
5534}
5535
5539static void
5540pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5541 // If the element being added is not an AssocNode or does not have a symbol
5542 // key, then we want to turn the SYMBOL_KEYS flag off.
5543 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5544 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5545 }
5546
5547 pm_node_list_append(&hash->elements, element);
5548 if (hash->base.location.start == NULL) {
5549 hash->base.location.start = element->location.start;
5550 }
5551 hash->base.location.end = element->location.end;
5552}
5553
5557static pm_required_keyword_parameter_node_t *
5558pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5559 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5560
5561 *node = (pm_required_keyword_parameter_node_t) {
5562 {
5563 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5564 .node_id = PM_NODE_IDENTIFY(parser),
5565 .location = {
5566 .start = name->start,
5567 .end = name->end
5568 },
5569 },
5570 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5571 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5572 };
5573
5574 return node;
5575}
5576
5580static pm_optional_keyword_parameter_node_t *
5581pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5582 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5583
5584 *node = (pm_optional_keyword_parameter_node_t) {
5585 {
5586 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5587 .node_id = PM_NODE_IDENTIFY(parser),
5588 .location = {
5589 .start = name->start,
5590 .end = value->location.end
5591 },
5592 },
5593 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5594 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5595 .value = value
5596 };
5597
5598 return node;
5599}
5600
5604static pm_keyword_rest_parameter_node_t *
5605pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5606 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5607
5608 *node = (pm_keyword_rest_parameter_node_t) {
5609 {
5610 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5611 .node_id = PM_NODE_IDENTIFY(parser),
5612 .location = {
5613 .start = operator->start,
5614 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5615 },
5616 },
5617 .name = pm_parser_optional_constant_id_token(parser, name),
5618 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5619 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5620 };
5621
5622 return node;
5623}
5624
5628static pm_lambda_node_t *
5629pm_lambda_node_create(
5630 pm_parser_t *parser,
5631 pm_constant_id_list_t *locals,
5632 const pm_token_t *operator,
5633 const pm_token_t *opening,
5634 const pm_token_t *closing,
5635 pm_node_t *parameters,
5636 pm_node_t *body
5637) {
5638 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5639
5640 *node = (pm_lambda_node_t) {
5641 {
5642 .type = PM_LAMBDA_NODE,
5643 .node_id = PM_NODE_IDENTIFY(parser),
5644 .location = {
5645 .start = operator->start,
5646 .end = closing->end
5647 },
5648 },
5649 .locals = *locals,
5650 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5651 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5652 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5653 .parameters = parameters,
5654 .body = body
5655 };
5656
5657 return node;
5658}
5659
5663static pm_local_variable_and_write_node_t *
5664pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5665 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5666 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5667 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5668
5669 *node = (pm_local_variable_and_write_node_t) {
5670 {
5671 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5672 .node_id = PM_NODE_IDENTIFY(parser),
5673 .location = {
5674 .start = target->location.start,
5675 .end = value->location.end
5676 }
5677 },
5678 .name_loc = target->location,
5679 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5680 .value = value,
5681 .name = name,
5682 .depth = depth
5683 };
5684
5685 return node;
5686}
5687
5691static pm_local_variable_operator_write_node_t *
5692pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5693 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5694
5695 *node = (pm_local_variable_operator_write_node_t) {
5696 {
5697 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5698 .node_id = PM_NODE_IDENTIFY(parser),
5699 .location = {
5700 .start = target->location.start,
5701 .end = value->location.end
5702 }
5703 },
5704 .name_loc = target->location,
5705 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5706 .value = value,
5707 .name = name,
5708 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5709 .depth = depth
5710 };
5711
5712 return node;
5713}
5714
5718static pm_local_variable_or_write_node_t *
5719pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5720 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5721 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5722 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5723
5724 *node = (pm_local_variable_or_write_node_t) {
5725 {
5726 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5727 .node_id = PM_NODE_IDENTIFY(parser),
5728 .location = {
5729 .start = target->location.start,
5730 .end = value->location.end
5731 }
5732 },
5733 .name_loc = target->location,
5734 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5735 .value = value,
5736 .name = name,
5737 .depth = depth
5738 };
5739
5740 return node;
5741}
5742
5746static pm_local_variable_read_node_t *
5747pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5748 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5749
5750 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5751
5752 *node = (pm_local_variable_read_node_t) {
5753 {
5754 .type = PM_LOCAL_VARIABLE_READ_NODE,
5755 .node_id = PM_NODE_IDENTIFY(parser),
5756 .location = PM_LOCATION_TOKEN_VALUE(name)
5757 },
5758 .name = name_id,
5759 .depth = depth
5760 };
5761
5762 return node;
5763}
5764
5768static pm_local_variable_read_node_t *
5769pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5770 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5771 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5772}
5773
5778static pm_local_variable_read_node_t *
5779pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5780 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5781 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5782}
5783
5787static pm_local_variable_write_node_t *
5788pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5789 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5790
5791 *node = (pm_local_variable_write_node_t) {
5792 {
5793 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5794 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5795 .node_id = PM_NODE_IDENTIFY(parser),
5796 .location = {
5797 .start = name_loc->start,
5798 .end = value->location.end
5799 }
5800 },
5801 .name = name,
5802 .depth = depth,
5803 .value = value,
5804 .name_loc = *name_loc,
5805 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5806 };
5807
5808 return node;
5809}
5810
5814static inline bool
5815pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5816 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5817}
5818
5823static inline bool
5824pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5825 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5826}
5827
5832static inline void
5833pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5834 if (pm_token_is_numbered_parameter(start, end)) {
5835 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5836 }
5837}
5838
5843static pm_local_variable_target_node_t *
5844pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5845 pm_refute_numbered_parameter(parser, location->start, location->end);
5846 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5847
5848 *node = (pm_local_variable_target_node_t) {
5849 {
5850 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5851 .node_id = PM_NODE_IDENTIFY(parser),
5852 .location = *location
5853 },
5854 .name = name,
5855 .depth = depth
5856 };
5857
5858 return node;
5859}
5860
5864static pm_match_predicate_node_t *
5865pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5866 pm_assert_value_expression(parser, value);
5867
5868 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5869
5870 *node = (pm_match_predicate_node_t) {
5871 {
5872 .type = PM_MATCH_PREDICATE_NODE,
5873 .node_id = PM_NODE_IDENTIFY(parser),
5874 .location = {
5875 .start = value->location.start,
5876 .end = pattern->location.end
5877 }
5878 },
5879 .value = value,
5880 .pattern = pattern,
5881 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5882 };
5883
5884 return node;
5885}
5886
5890static pm_match_required_node_t *
5891pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5892 pm_assert_value_expression(parser, value);
5893
5894 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5895
5896 *node = (pm_match_required_node_t) {
5897 {
5898 .type = PM_MATCH_REQUIRED_NODE,
5899 .node_id = PM_NODE_IDENTIFY(parser),
5900 .location = {
5901 .start = value->location.start,
5902 .end = pattern->location.end
5903 }
5904 },
5905 .value = value,
5906 .pattern = pattern,
5907 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5908 };
5909
5910 return node;
5911}
5912
5916static pm_match_write_node_t *
5917pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5918 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5919
5920 *node = (pm_match_write_node_t) {
5921 {
5922 .type = PM_MATCH_WRITE_NODE,
5923 .node_id = PM_NODE_IDENTIFY(parser),
5924 .location = call->base.location
5925 },
5926 .call = call,
5927 .targets = { 0 }
5928 };
5929
5930 return node;
5931}
5932
5936static pm_module_node_t *
5937pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5938 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5939
5940 *node = (pm_module_node_t) {
5941 {
5942 .type = PM_MODULE_NODE,
5943 .node_id = PM_NODE_IDENTIFY(parser),
5944 .location = {
5945 .start = module_keyword->start,
5946 .end = end_keyword->end
5947 }
5948 },
5949 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5950 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5951 .constant_path = constant_path,
5952 .body = body,
5953 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5954 .name = pm_parser_constant_id_token(parser, name)
5955 };
5956
5957 return node;
5958}
5959
5963static pm_multi_target_node_t *
5964pm_multi_target_node_create(pm_parser_t *parser) {
5965 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5966
5967 *node = (pm_multi_target_node_t) {
5968 {
5969 .type = PM_MULTI_TARGET_NODE,
5970 .node_id = PM_NODE_IDENTIFY(parser),
5971 .location = { .start = NULL, .end = NULL }
5972 },
5973 .lefts = { 0 },
5974 .rest = NULL,
5975 .rights = { 0 },
5976 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5977 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5978 };
5979
5980 return node;
5981}
5982
5986static void
5987pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5988 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5989 if (node->rest == NULL) {
5990 node->rest = target;
5991 } else {
5992 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5993 pm_node_list_append(&node->rights, target);
5994 }
5995 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5996 if (node->rest == NULL) {
5997 node->rest = target;
5998 } else {
5999 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6000 pm_node_list_append(&node->rights, target);
6001 }
6002 } else if (node->rest == NULL) {
6003 pm_node_list_append(&node->lefts, target);
6004 } else {
6005 pm_node_list_append(&node->rights, target);
6006 }
6007
6008 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6009 node->base.location.start = target->location.start;
6010 }
6011
6012 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6013 node->base.location.end = target->location.end;
6014 }
6015}
6016
6020static void
6021pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6022 node->base.location.start = lparen->start;
6023 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6024}
6025
6029static void
6030pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6031 node->base.location.end = rparen->end;
6032 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6033}
6034
6038static pm_multi_write_node_t *
6039pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6040 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6041
6042 *node = (pm_multi_write_node_t) {
6043 {
6044 .type = PM_MULTI_WRITE_NODE,
6045 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6046 .node_id = PM_NODE_IDENTIFY(parser),
6047 .location = {
6048 .start = target->base.location.start,
6049 .end = value->location.end
6050 }
6051 },
6052 .lefts = target->lefts,
6053 .rest = target->rest,
6054 .rights = target->rights,
6055 .lparen_loc = target->lparen_loc,
6056 .rparen_loc = target->rparen_loc,
6057 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6058 .value = value
6059 };
6060
6061 // Explicitly do not call pm_node_destroy here because we want to keep
6062 // around all of the information within the MultiWriteNode node.
6063 xfree(target);
6064
6065 return node;
6066}
6067
6071static pm_next_node_t *
6072pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6073 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6074 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6075
6076 *node = (pm_next_node_t) {
6077 {
6078 .type = PM_NEXT_NODE,
6079 .node_id = PM_NODE_IDENTIFY(parser),
6080 .location = {
6081 .start = keyword->start,
6082 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6083 }
6084 },
6085 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6086 .arguments = arguments
6087 };
6088
6089 return node;
6090}
6091
6095static pm_nil_node_t *
6096pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6097 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6098 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6099
6100 *node = (pm_nil_node_t) {{
6101 .type = PM_NIL_NODE,
6102 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6103 .node_id = PM_NODE_IDENTIFY(parser),
6104 .location = PM_LOCATION_TOKEN_VALUE(token)
6105 }};
6106
6107 return node;
6108}
6109
6113static pm_no_keywords_parameter_node_t *
6114pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6115 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6116 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6117 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6118
6119 *node = (pm_no_keywords_parameter_node_t) {
6120 {
6121 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6122 .node_id = PM_NODE_IDENTIFY(parser),
6123 .location = {
6124 .start = operator->start,
6125 .end = keyword->end
6126 }
6127 },
6128 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6129 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6130 };
6131
6132 return node;
6133}
6134
6138static pm_numbered_parameters_node_t *
6139pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6140 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6141
6142 *node = (pm_numbered_parameters_node_t) {
6143 {
6144 .type = PM_NUMBERED_PARAMETERS_NODE,
6145 .node_id = PM_NODE_IDENTIFY(parser),
6146 .location = *location
6147 },
6148 .maximum = maximum
6149 };
6150
6151 return node;
6152}
6153
6158#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6159
6166static uint32_t
6167pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6168 const uint8_t *start = token->start + 1;
6169 const uint8_t *end = token->end;
6170
6171 ptrdiff_t diff = end - start;
6172 assert(diff > 0);
6173#if PTRDIFF_MAX > SIZE_MAX
6174 assert(diff < (ptrdiff_t) SIZE_MAX);
6175#endif
6176 size_t length = (size_t) diff;
6177
6178 char *digits = xcalloc(length + 1, sizeof(char));
6179 memcpy(digits, start, length);
6180 digits[length] = '\0';
6181
6182 char *endptr;
6183 errno = 0;
6184 unsigned long value = strtoul(digits, &endptr, 10);
6185
6186 if ((digits == endptr) || (*endptr != '\0')) {
6187 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6188 value = 0;
6189 }
6190
6191 xfree(digits);
6192
6193 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6194 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6195 value = 0;
6196 }
6197
6198 return (uint32_t) value;
6199}
6200
6201#undef NTH_REF_MAX
6202
6206static pm_numbered_reference_read_node_t *
6207pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6208 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6209 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6210
6211 *node = (pm_numbered_reference_read_node_t) {
6212 {
6213 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6214 .node_id = PM_NODE_IDENTIFY(parser),
6215 .location = PM_LOCATION_TOKEN_VALUE(name),
6216 },
6217 .number = pm_numbered_reference_read_node_number(parser, name)
6218 };
6219
6220 return node;
6221}
6222
6226static pm_optional_parameter_node_t *
6227pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6228 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6229
6230 *node = (pm_optional_parameter_node_t) {
6231 {
6232 .type = PM_OPTIONAL_PARAMETER_NODE,
6233 .node_id = PM_NODE_IDENTIFY(parser),
6234 .location = {
6235 .start = name->start,
6236 .end = value->location.end
6237 }
6238 },
6239 .name = pm_parser_constant_id_token(parser, name),
6240 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6241 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6242 .value = value
6243 };
6244
6245 return node;
6246}
6247
6251static pm_or_node_t *
6252pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6253 pm_assert_value_expression(parser, left);
6254
6255 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6256
6257 *node = (pm_or_node_t) {
6258 {
6259 .type = PM_OR_NODE,
6260 .node_id = PM_NODE_IDENTIFY(parser),
6261 .location = {
6262 .start = left->location.start,
6263 .end = right->location.end
6264 }
6265 },
6266 .left = left,
6267 .right = right,
6268 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6269 };
6270
6271 return node;
6272}
6273
6277static pm_parameters_node_t *
6278pm_parameters_node_create(pm_parser_t *parser) {
6279 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6280
6281 *node = (pm_parameters_node_t) {
6282 {
6283 .type = PM_PARAMETERS_NODE,
6284 .node_id = PM_NODE_IDENTIFY(parser),
6285 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6286 },
6287 .rest = NULL,
6288 .keyword_rest = NULL,
6289 .block = NULL,
6290 .requireds = { 0 },
6291 .optionals = { 0 },
6292 .posts = { 0 },
6293 .keywords = { 0 }
6294 };
6295
6296 return node;
6297}
6298
6302static void
6303pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6304 if (params->base.location.start == NULL) {
6305 params->base.location.start = param->location.start;
6306 } else {
6307 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6308 }
6309
6310 if (params->base.location.end == NULL) {
6311 params->base.location.end = param->location.end;
6312 } else {
6313 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6314 }
6315}
6316
6320static void
6321pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6322 pm_parameters_node_location_set(params, param);
6323 pm_node_list_append(&params->requireds, param);
6324}
6325
6329static void
6330pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6331 pm_parameters_node_location_set(params, (pm_node_t *) param);
6332 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6333}
6334
6338static void
6339pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6340 pm_parameters_node_location_set(params, param);
6341 pm_node_list_append(&params->posts, param);
6342}
6343
6347static void
6348pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6349 pm_parameters_node_location_set(params, param);
6350 params->rest = param;
6351}
6352
6356static void
6357pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6358 pm_parameters_node_location_set(params, param);
6359 pm_node_list_append(&params->keywords, param);
6360}
6361
6365static void
6366pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6367 assert(params->keyword_rest == NULL);
6368 pm_parameters_node_location_set(params, param);
6369 params->keyword_rest = param;
6370}
6371
6375static void
6376pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6377 assert(params->block == NULL);
6378 pm_parameters_node_location_set(params, (pm_node_t *) param);
6379 params->block = param;
6380}
6381
6385static pm_program_node_t *
6386pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6387 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6388
6389 *node = (pm_program_node_t) {
6390 {
6391 .type = PM_PROGRAM_NODE,
6392 .node_id = PM_NODE_IDENTIFY(parser),
6393 .location = {
6394 .start = statements == NULL ? parser->start : statements->base.location.start,
6395 .end = statements == NULL ? parser->end : statements->base.location.end
6396 }
6397 },
6398 .locals = *locals,
6399 .statements = statements
6400 };
6401
6402 return node;
6403}
6404
6408static pm_parentheses_node_t *
6409pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6410 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6411
6412 *node = (pm_parentheses_node_t) {
6413 {
6414 .type = PM_PARENTHESES_NODE,
6415 .node_id = PM_NODE_IDENTIFY(parser),
6416 .location = {
6417 .start = opening->start,
6418 .end = closing->end
6419 }
6420 },
6421 .body = body,
6422 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6423 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6424 };
6425
6426 return node;
6427}
6428
6432static pm_pinned_expression_node_t *
6433pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6434 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6435
6436 *node = (pm_pinned_expression_node_t) {
6437 {
6438 .type = PM_PINNED_EXPRESSION_NODE,
6439 .node_id = PM_NODE_IDENTIFY(parser),
6440 .location = {
6441 .start = operator->start,
6442 .end = rparen->end
6443 }
6444 },
6445 .expression = expression,
6446 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6447 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6448 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6449 };
6450
6451 return node;
6452}
6453
6457static pm_pinned_variable_node_t *
6458pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6459 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6460
6461 *node = (pm_pinned_variable_node_t) {
6462 {
6463 .type = PM_PINNED_VARIABLE_NODE,
6464 .node_id = PM_NODE_IDENTIFY(parser),
6465 .location = {
6466 .start = operator->start,
6467 .end = variable->location.end
6468 }
6469 },
6470 .variable = variable,
6471 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6472 };
6473
6474 return node;
6475}
6476
6480static pm_post_execution_node_t *
6481pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6482 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6483
6484 *node = (pm_post_execution_node_t) {
6485 {
6486 .type = PM_POST_EXECUTION_NODE,
6487 .node_id = PM_NODE_IDENTIFY(parser),
6488 .location = {
6489 .start = keyword->start,
6490 .end = closing->end
6491 }
6492 },
6493 .statements = statements,
6494 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6495 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6496 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6497 };
6498
6499 return node;
6500}
6501
6505static pm_pre_execution_node_t *
6506pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6507 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6508
6509 *node = (pm_pre_execution_node_t) {
6510 {
6511 .type = PM_PRE_EXECUTION_NODE,
6512 .node_id = PM_NODE_IDENTIFY(parser),
6513 .location = {
6514 .start = keyword->start,
6515 .end = closing->end
6516 }
6517 },
6518 .statements = statements,
6519 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6520 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6521 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6522 };
6523
6524 return node;
6525}
6526
6530static pm_range_node_t *
6531pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6532 pm_assert_value_expression(parser, left);
6533 pm_assert_value_expression(parser, right);
6534
6535 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6536 pm_node_flags_t flags = 0;
6537
6538 // Indicate that this node is an exclusive range if the operator is `...`.
6539 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6540 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6541 }
6542
6543 // Indicate that this node is a static literal (i.e., can be compiled with
6544 // a putobject in CRuby) if the left and right are implicit nil, explicit
6545 // nil, or integers.
6546 if (
6547 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6548 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6549 ) {
6550 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6551 }
6552
6553 *node = (pm_range_node_t) {
6554 {
6555 .type = PM_RANGE_NODE,
6556 .flags = flags,
6557 .node_id = PM_NODE_IDENTIFY(parser),
6558 .location = {
6559 .start = (left == NULL ? operator->start : left->location.start),
6560 .end = (right == NULL ? operator->end : right->location.end)
6561 }
6562 },
6563 .left = left,
6564 .right = right,
6565 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6566 };
6567
6568 return node;
6569}
6570
6574static pm_redo_node_t *
6575pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6576 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6577 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6578
6579 *node = (pm_redo_node_t) {{
6580 .type = PM_REDO_NODE,
6581 .node_id = PM_NODE_IDENTIFY(parser),
6582 .location = PM_LOCATION_TOKEN_VALUE(token)
6583 }};
6584
6585 return node;
6586}
6587
6592static pm_regular_expression_node_t *
6593pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6594 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6595
6596 *node = (pm_regular_expression_node_t) {
6597 {
6598 .type = PM_REGULAR_EXPRESSION_NODE,
6599 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6600 .node_id = PM_NODE_IDENTIFY(parser),
6601 .location = {
6602 .start = MIN(opening->start, closing->start),
6603 .end = MAX(opening->end, closing->end)
6604 }
6605 },
6606 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6607 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6608 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6609 .unescaped = *unescaped
6610 };
6611
6612 return node;
6613}
6614
6618static inline pm_regular_expression_node_t *
6619pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6620 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6621}
6622
6626static pm_required_parameter_node_t *
6627pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6628 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6629
6630 *node = (pm_required_parameter_node_t) {
6631 {
6632 .type = PM_REQUIRED_PARAMETER_NODE,
6633 .node_id = PM_NODE_IDENTIFY(parser),
6634 .location = PM_LOCATION_TOKEN_VALUE(token)
6635 },
6636 .name = pm_parser_constant_id_token(parser, token)
6637 };
6638
6639 return node;
6640}
6641
6645static pm_rescue_modifier_node_t *
6646pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6647 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6648
6649 *node = (pm_rescue_modifier_node_t) {
6650 {
6651 .type = PM_RESCUE_MODIFIER_NODE,
6652 .node_id = PM_NODE_IDENTIFY(parser),
6653 .location = {
6654 .start = expression->location.start,
6655 .end = rescue_expression->location.end
6656 }
6657 },
6658 .expression = expression,
6659 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6660 .rescue_expression = rescue_expression
6661 };
6662
6663 return node;
6664}
6665
6669static pm_rescue_node_t *
6670pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6671 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6672
6673 *node = (pm_rescue_node_t) {
6674 {
6675 .type = PM_RESCUE_NODE,
6676 .node_id = PM_NODE_IDENTIFY(parser),
6677 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6678 },
6679 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6680 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6681 .reference = NULL,
6682 .statements = NULL,
6683 .subsequent = NULL,
6684 .exceptions = { 0 }
6685 };
6686
6687 return node;
6688}
6689
6690static inline void
6691pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6692 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6693}
6694
6698static void
6699pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6700 node->reference = reference;
6701 node->base.location.end = reference->location.end;
6702}
6703
6707static void
6708pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6709 node->statements = statements;
6710 if (pm_statements_node_body_length(statements) > 0) {
6711 node->base.location.end = statements->base.location.end;
6712 }
6713}
6714
6718static void
6719pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6720 node->subsequent = subsequent;
6721 node->base.location.end = subsequent->base.location.end;
6722}
6723
6727static void
6728pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6729 pm_node_list_append(&node->exceptions, exception);
6730 node->base.location.end = exception->location.end;
6731}
6732
6736static pm_rest_parameter_node_t *
6737pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6738 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6739
6740 *node = (pm_rest_parameter_node_t) {
6741 {
6742 .type = PM_REST_PARAMETER_NODE,
6743 .node_id = PM_NODE_IDENTIFY(parser),
6744 .location = {
6745 .start = operator->start,
6746 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6747 }
6748 },
6749 .name = pm_parser_optional_constant_id_token(parser, name),
6750 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6751 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6752 };
6753
6754 return node;
6755}
6756
6760static pm_retry_node_t *
6761pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6762 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6763 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6764
6765 *node = (pm_retry_node_t) {{
6766 .type = PM_RETRY_NODE,
6767 .node_id = PM_NODE_IDENTIFY(parser),
6768 .location = PM_LOCATION_TOKEN_VALUE(token)
6769 }};
6770
6771 return node;
6772}
6773
6777static pm_return_node_t *
6778pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6779 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6780
6781 *node = (pm_return_node_t) {
6782 {
6783 .type = PM_RETURN_NODE,
6784 .node_id = PM_NODE_IDENTIFY(parser),
6785 .location = {
6786 .start = keyword->start,
6787 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6788 }
6789 },
6790 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6791 .arguments = arguments
6792 };
6793
6794 return node;
6795}
6796
6800static pm_self_node_t *
6801pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6802 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6803 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6804
6805 *node = (pm_self_node_t) {{
6806 .type = PM_SELF_NODE,
6807 .node_id = PM_NODE_IDENTIFY(parser),
6808 .location = PM_LOCATION_TOKEN_VALUE(token)
6809 }};
6810
6811 return node;
6812}
6813
6817static pm_shareable_constant_node_t *
6818pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6819 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6820
6821 *node = (pm_shareable_constant_node_t) {
6822 {
6823 .type = PM_SHAREABLE_CONSTANT_NODE,
6824 .flags = (pm_node_flags_t) value,
6825 .node_id = PM_NODE_IDENTIFY(parser),
6826 .location = PM_LOCATION_NODE_VALUE(write)
6827 },
6828 .write = write
6829 };
6830
6831 return node;
6832}
6833
6837static pm_singleton_class_node_t *
6838pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6839 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6840
6841 *node = (pm_singleton_class_node_t) {
6842 {
6843 .type = PM_SINGLETON_CLASS_NODE,
6844 .node_id = PM_NODE_IDENTIFY(parser),
6845 .location = {
6846 .start = class_keyword->start,
6847 .end = end_keyword->end
6848 }
6849 },
6850 .locals = *locals,
6851 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6852 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6853 .expression = expression,
6854 .body = body,
6855 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6856 };
6857
6858 return node;
6859}
6860
6864static pm_source_encoding_node_t *
6865pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6866 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6867 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6868
6869 *node = (pm_source_encoding_node_t) {{
6870 .type = PM_SOURCE_ENCODING_NODE,
6871 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6872 .node_id = PM_NODE_IDENTIFY(parser),
6873 .location = PM_LOCATION_TOKEN_VALUE(token)
6874 }};
6875
6876 return node;
6877}
6878
6882static pm_source_file_node_t*
6883pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6884 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6885 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6886
6887 pm_node_flags_t flags = 0;
6888
6889 switch (parser->frozen_string_literal) {
6890 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6891 flags |= PM_STRING_FLAGS_MUTABLE;
6892 break;
6893 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6894 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6895 break;
6896 }
6897
6898 *node = (pm_source_file_node_t) {
6899 {
6900 .type = PM_SOURCE_FILE_NODE,
6901 .flags = flags,
6902 .node_id = PM_NODE_IDENTIFY(parser),
6903 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6904 },
6905 .filepath = parser->filepath
6906 };
6907
6908 return node;
6909}
6910
6914static pm_source_line_node_t *
6915pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6916 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6917 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6918
6919 *node = (pm_source_line_node_t) {{
6920 .type = PM_SOURCE_LINE_NODE,
6921 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6922 .node_id = PM_NODE_IDENTIFY(parser),
6923 .location = PM_LOCATION_TOKEN_VALUE(token)
6924 }};
6925
6926 return node;
6927}
6928
6932static pm_splat_node_t *
6933pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6934 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6935
6936 *node = (pm_splat_node_t) {
6937 {
6938 .type = PM_SPLAT_NODE,
6939 .node_id = PM_NODE_IDENTIFY(parser),
6940 .location = {
6941 .start = operator->start,
6942 .end = (expression == NULL ? operator->end : expression->location.end)
6943 }
6944 },
6945 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6946 .expression = expression
6947 };
6948
6949 return node;
6950}
6951
6955static pm_statements_node_t *
6956pm_statements_node_create(pm_parser_t *parser) {
6957 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6958
6959 *node = (pm_statements_node_t) {
6960 {
6961 .type = PM_STATEMENTS_NODE,
6962 .node_id = PM_NODE_IDENTIFY(parser),
6963 .location = PM_LOCATION_NULL_VALUE(parser)
6964 },
6965 .body = { 0 }
6966 };
6967
6968 return node;
6969}
6970
6974static size_t
6975pm_statements_node_body_length(pm_statements_node_t *node) {
6976 return node && node->body.size;
6977}
6978
6982static void
6983pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6984 node->base.location = (pm_location_t) { .start = start, .end = end };
6985}
6986
6991static inline void
6992pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6993 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6994 node->base.location.start = statement->location.start;
6995 }
6996
6997 if (statement->location.end > node->base.location.end) {
6998 node->base.location.end = statement->location.end;
6999 }
7000}
7001
7005static void
7006pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7007 pm_statements_node_body_update(node, statement);
7008
7009 if (node->body.size > 0) {
7010 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7011
7012 switch (PM_NODE_TYPE(previous)) {
7013 case PM_BREAK_NODE:
7014 case PM_NEXT_NODE:
7015 case PM_REDO_NODE:
7016 case PM_RETRY_NODE:
7017 case PM_RETURN_NODE:
7018 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7019 break;
7020 default:
7021 break;
7022 }
7023 }
7024
7025 pm_node_list_append(&node->body, statement);
7026 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7027}
7028
7032static void
7033pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7034 pm_statements_node_body_update(node, statement);
7035 pm_node_list_prepend(&node->body, statement);
7036 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7037}
7038
7042static inline pm_string_node_t *
7043pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7044 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7045 pm_node_flags_t flags = 0;
7046
7047 switch (parser->frozen_string_literal) {
7048 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7049 flags = PM_STRING_FLAGS_MUTABLE;
7050 break;
7051 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7052 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7053 break;
7054 }
7055
7056 *node = (pm_string_node_t) {
7057 {
7058 .type = PM_STRING_NODE,
7059 .flags = flags,
7060 .node_id = PM_NODE_IDENTIFY(parser),
7061 .location = {
7062 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7063 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7064 }
7065 },
7066 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7067 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7068 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7069 .unescaped = *string
7070 };
7071
7072 return node;
7073}
7074
7078static pm_string_node_t *
7079pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7080 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7081}
7082
7087static pm_string_node_t *
7088pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7089 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7090 parser->current_string = PM_STRING_EMPTY;
7091 return node;
7092}
7093
7097static pm_super_node_t *
7098pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7099 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7100 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7101
7102 const uint8_t *end = pm_arguments_end(arguments);
7103 if (end == NULL) {
7104 assert(false && "unreachable");
7105 }
7106
7107 *node = (pm_super_node_t) {
7108 {
7109 .type = PM_SUPER_NODE,
7110 .node_id = PM_NODE_IDENTIFY(parser),
7111 .location = {
7112 .start = keyword->start,
7113 .end = end,
7114 }
7115 },
7116 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7117 .lparen_loc = arguments->opening_loc,
7118 .arguments = arguments->arguments,
7119 .rparen_loc = arguments->closing_loc,
7120 .block = arguments->block
7121 };
7122
7123 return node;
7124}
7125
7130static bool
7131pm_ascii_only_p(const pm_string_t *contents) {
7132 const size_t length = pm_string_length(contents);
7133 const uint8_t *source = pm_string_source(contents);
7134
7135 for (size_t index = 0; index < length; index++) {
7136 if (source[index] & 0x80) return false;
7137 }
7138
7139 return true;
7140}
7141
7145static void
7146parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7147 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7148 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7149
7150 if (width == 0) {
7151 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7152 break;
7153 }
7154
7155 cursor += width;
7156 }
7157}
7158
7163static void
7164parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7165 const pm_encoding_t *encoding = parser->encoding;
7166
7167 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7168 size_t width = encoding->char_width(cursor, end - cursor);
7169
7170 if (width == 0) {
7171 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7172 break;
7173 }
7174
7175 cursor += width;
7176 }
7177}
7178
7188static inline pm_node_flags_t
7189parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7190 if (parser->explicit_encoding != NULL) {
7191 // A Symbol may optionally have its encoding explicitly set. This will
7192 // happen if an escape sequence results in a non-ASCII code point.
7193 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7194 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7195 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7196 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7197 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7198 } else if (validate) {
7199 parse_symbol_encoding_validate_other(parser, location, contents);
7200 }
7201 } else if (pm_ascii_only_p(contents)) {
7202 // Ruby stipulates that all source files must use an ASCII-compatible
7203 // encoding. Thus, all symbols appearing in source are eligible for
7204 // "downgrading" to US-ASCII.
7205 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7206 } else if (validate) {
7207 parse_symbol_encoding_validate_other(parser, location, contents);
7208 }
7209
7210 return 0;
7211}
7212
7213static pm_node_flags_t
7214parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7215 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7216 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7217 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7218 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7219
7220 // There's special validation logic used if a string does not contain any character escape sequences.
7221 if (parser->explicit_encoding == NULL) {
7222 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7223 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7224 // the US-ASCII encoding.
7225 if (ascii_only) {
7226 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7227 }
7228
7229 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7230 if (!ascii_only) {
7231 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7232 }
7233 } else if (parser->encoding != modifier_encoding) {
7234 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7235
7236 if (modifier == 'n' && !ascii_only) {
7237 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7238 }
7239 }
7240
7241 return flags;
7242 }
7243
7244 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7245 bool mixed_encoding = false;
7246
7247 if (mixed_encoding) {
7248 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7249 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7250 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7251 bool valid_string_in_modifier_encoding = true;
7252
7253 if (!valid_string_in_modifier_encoding) {
7254 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7255 }
7256 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7257 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7258 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7259 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7260 }
7261 }
7262
7263 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7264 return flags;
7265}
7266
7273static pm_node_flags_t
7274parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7275 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7276 bool valid_unicode_range = true;
7277 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7278 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7279 return flags;
7280 }
7281
7282 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7283 // to multi-byte characters are allowed.
7284 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7285 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7286 // following error message appearing twice. We do the same for compatibility.
7287 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7288 }
7289
7298 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7299 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7300 }
7301
7302 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7303 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7304 }
7305
7306 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7307 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7308 }
7309
7310 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7311 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7312 }
7313
7314 // At this point no encoding modifiers will be present on the regular expression as they would have already
7315 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7316 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7317 if (ascii_only) {
7318 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7319 }
7320
7321 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7322 // or by specifying a modifier.
7323 //
7324 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7325 if (parser->explicit_encoding != NULL) {
7326 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7327 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7328 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7329 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7330 }
7331 }
7332
7333 return 0;
7334}
7335
7340static pm_symbol_node_t *
7341pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7342 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7343
7344 *node = (pm_symbol_node_t) {
7345 {
7346 .type = PM_SYMBOL_NODE,
7347 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7348 .node_id = PM_NODE_IDENTIFY(parser),
7349 .location = {
7350 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7351 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7352 }
7353 },
7354 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7355 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7356 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7357 .unescaped = *unescaped
7358 };
7359
7360 return node;
7361}
7362
7366static inline pm_symbol_node_t *
7367pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7368 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7369}
7370
7374static pm_symbol_node_t *
7375pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7376 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7377 parser->current_string = PM_STRING_EMPTY;
7378 return node;
7379}
7380
7384static pm_symbol_node_t *
7385pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7386 pm_symbol_node_t *node;
7387
7388 switch (token->type) {
7389 case PM_TOKEN_LABEL: {
7390 pm_token_t opening = not_provided(parser);
7391 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7392
7393 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7394 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7395
7396 assert((label.end - label.start) >= 0);
7397 pm_string_shared_init(&node->unescaped, label.start, label.end);
7398 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7399
7400 break;
7401 }
7402 case PM_TOKEN_MISSING: {
7403 pm_token_t opening = not_provided(parser);
7404 pm_token_t closing = not_provided(parser);
7405
7406 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7407 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7408 break;
7409 }
7410 default:
7411 assert(false && "unreachable");
7412 node = NULL;
7413 break;
7414 }
7415
7416 return node;
7417}
7418
7422static pm_symbol_node_t *
7423pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7424 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7425
7426 *node = (pm_symbol_node_t) {
7427 {
7428 .type = PM_SYMBOL_NODE,
7429 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7430 .node_id = PM_NODE_IDENTIFY(parser),
7431 .location = PM_LOCATION_NULL_VALUE(parser)
7432 },
7433 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7434 .unescaped = { 0 }
7435 };
7436
7437 pm_string_constant_init(&node->unescaped, content, strlen(content));
7438 return node;
7439}
7440
7444static bool
7445pm_symbol_node_label_p(pm_node_t *node) {
7446 const uint8_t *end = NULL;
7447
7448 switch (PM_NODE_TYPE(node)) {
7449 case PM_SYMBOL_NODE:
7450 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7451 break;
7452 case PM_INTERPOLATED_SYMBOL_NODE:
7453 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7454 break;
7455 default:
7456 return false;
7457 }
7458
7459 return (end != NULL) && (end[-1] == ':');
7460}
7461
7465static pm_symbol_node_t *
7466pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7467 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7468
7469 *new_node = (pm_symbol_node_t) {
7470 {
7471 .type = PM_SYMBOL_NODE,
7472 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7473 .node_id = PM_NODE_IDENTIFY(parser),
7474 .location = {
7475 .start = opening->start,
7476 .end = closing->end
7477 }
7478 },
7479 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7480 .value_loc = node->content_loc,
7481 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7482 .unescaped = node->unescaped
7483 };
7484
7485 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7486 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7487
7488 // We are explicitly _not_ using pm_node_destroy here because we don't want
7489 // to trash the unescaped string. We could instead copy the string if we
7490 // know that it is owned, but we're taking the fast path for now.
7491 xfree(node);
7492
7493 return new_node;
7494}
7495
7499static pm_string_node_t *
7500pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7501 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7502 pm_node_flags_t flags = 0;
7503
7504 switch (parser->frozen_string_literal) {
7505 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7506 flags = PM_STRING_FLAGS_MUTABLE;
7507 break;
7508 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7509 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7510 break;
7511 }
7512
7513 *new_node = (pm_string_node_t) {
7514 {
7515 .type = PM_STRING_NODE,
7516 .flags = flags,
7517 .node_id = PM_NODE_IDENTIFY(parser),
7518 .location = node->base.location
7519 },
7520 .opening_loc = node->opening_loc,
7521 .content_loc = node->value_loc,
7522 .closing_loc = node->closing_loc,
7523 .unescaped = node->unescaped
7524 };
7525
7526 // We are explicitly _not_ using pm_node_destroy here because we don't want
7527 // to trash the unescaped string. We could instead copy the string if we
7528 // know that it is owned, but we're taking the fast path for now.
7529 xfree(node);
7530
7531 return new_node;
7532}
7533
7537static pm_true_node_t *
7538pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7539 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7540 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7541
7542 *node = (pm_true_node_t) {{
7543 .type = PM_TRUE_NODE,
7544 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7545 .node_id = PM_NODE_IDENTIFY(parser),
7546 .location = PM_LOCATION_TOKEN_VALUE(token)
7547 }};
7548
7549 return node;
7550}
7551
7555static pm_true_node_t *
7556pm_true_node_synthesized_create(pm_parser_t *parser) {
7557 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7558
7559 *node = (pm_true_node_t) {{
7560 .type = PM_TRUE_NODE,
7561 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7562 .node_id = PM_NODE_IDENTIFY(parser),
7563 .location = { .start = parser->start, .end = parser->end }
7564 }};
7565
7566 return node;
7567}
7568
7572static pm_undef_node_t *
7573pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7574 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7575 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7576
7577 *node = (pm_undef_node_t) {
7578 {
7579 .type = PM_UNDEF_NODE,
7580 .node_id = PM_NODE_IDENTIFY(parser),
7581 .location = PM_LOCATION_TOKEN_VALUE(token),
7582 },
7583 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7584 .names = { 0 }
7585 };
7586
7587 return node;
7588}
7589
7593static void
7594pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7595 node->base.location.end = name->location.end;
7596 pm_node_list_append(&node->names, name);
7597}
7598
7602static pm_unless_node_t *
7603pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7604 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7605 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7606
7607 const uint8_t *end;
7608 if (statements != NULL) {
7609 end = statements->base.location.end;
7610 } else {
7611 end = predicate->location.end;
7612 }
7613
7614 *node = (pm_unless_node_t) {
7615 {
7616 .type = PM_UNLESS_NODE,
7617 .flags = PM_NODE_FLAG_NEWLINE,
7618 .node_id = PM_NODE_IDENTIFY(parser),
7619 .location = {
7620 .start = keyword->start,
7621 .end = end
7622 },
7623 },
7624 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7625 .predicate = predicate,
7626 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7627 .statements = statements,
7628 .else_clause = NULL,
7629 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7630 };
7631
7632 return node;
7633}
7634
7638static pm_unless_node_t *
7639pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7640 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7641 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7642
7643 pm_statements_node_t *statements = pm_statements_node_create(parser);
7644 pm_statements_node_body_append(parser, statements, statement, true);
7645
7646 *node = (pm_unless_node_t) {
7647 {
7648 .type = PM_UNLESS_NODE,
7649 .flags = PM_NODE_FLAG_NEWLINE,
7650 .node_id = PM_NODE_IDENTIFY(parser),
7651 .location = {
7652 .start = statement->location.start,
7653 .end = predicate->location.end
7654 },
7655 },
7656 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7657 .predicate = predicate,
7658 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7659 .statements = statements,
7660 .else_clause = NULL,
7661 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7662 };
7663
7664 return node;
7665}
7666
7667static inline void
7668pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7669 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7670 node->base.location.end = end_keyword->end;
7671}
7672
7678static void
7679pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7680 assert(parser->current_block_exits != NULL);
7681
7682 // All of the block exits that we want to remove should be within the
7683 // statements, and since we are modifying the statements, we shouldn't have
7684 // to check the end location.
7685 const uint8_t *start = statements->base.location.start;
7686
7687 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7688 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7689 if (block_exit->location.start < start) break;
7690
7691 // Implicitly remove from the list by lowering the size.
7692 parser->current_block_exits->size--;
7693 }
7694}
7695
7699static pm_until_node_t *
7700pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7701 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7702 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7703
7704 *node = (pm_until_node_t) {
7705 {
7706 .type = PM_UNTIL_NODE,
7707 .flags = flags,
7708 .node_id = PM_NODE_IDENTIFY(parser),
7709 .location = {
7710 .start = keyword->start,
7711 .end = closing->end,
7712 },
7713 },
7714 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7715 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7716 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7717 .predicate = predicate,
7718 .statements = statements
7719 };
7720
7721 return node;
7722}
7723
7727static pm_until_node_t *
7728pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7729 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7730 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7731 pm_loop_modifier_block_exits(parser, statements);
7732
7733 *node = (pm_until_node_t) {
7734 {
7735 .type = PM_UNTIL_NODE,
7736 .flags = flags,
7737 .node_id = PM_NODE_IDENTIFY(parser),
7738 .location = {
7739 .start = statements->base.location.start,
7740 .end = predicate->location.end,
7741 },
7742 },
7743 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7744 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7745 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7746 .predicate = predicate,
7747 .statements = statements
7748 };
7749
7750 return node;
7751}
7752
7756static pm_when_node_t *
7757pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7758 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7759
7760 *node = (pm_when_node_t) {
7761 {
7762 .type = PM_WHEN_NODE,
7763 .node_id = PM_NODE_IDENTIFY(parser),
7764 .location = {
7765 .start = keyword->start,
7766 .end = NULL
7767 }
7768 },
7769 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7770 .statements = NULL,
7771 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7772 .conditions = { 0 }
7773 };
7774
7775 return node;
7776}
7777
7781static void
7782pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7783 node->base.location.end = condition->location.end;
7784 pm_node_list_append(&node->conditions, condition);
7785}
7786
7790static inline void
7791pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7792 node->base.location.end = then_keyword->end;
7793 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7794}
7795
7799static void
7800pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7801 if (statements->base.location.end > node->base.location.end) {
7802 node->base.location.end = statements->base.location.end;
7803 }
7804
7805 node->statements = statements;
7806}
7807
7811static pm_while_node_t *
7812pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7813 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7814 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7815
7816 *node = (pm_while_node_t) {
7817 {
7818 .type = PM_WHILE_NODE,
7819 .flags = flags,
7820 .node_id = PM_NODE_IDENTIFY(parser),
7821 .location = {
7822 .start = keyword->start,
7823 .end = closing->end
7824 },
7825 },
7826 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7827 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7828 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7829 .predicate = predicate,
7830 .statements = statements
7831 };
7832
7833 return node;
7834}
7835
7839static pm_while_node_t *
7840pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7841 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7842 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7843 pm_loop_modifier_block_exits(parser, statements);
7844
7845 *node = (pm_while_node_t) {
7846 {
7847 .type = PM_WHILE_NODE,
7848 .flags = flags,
7849 .node_id = PM_NODE_IDENTIFY(parser),
7850 .location = {
7851 .start = statements->base.location.start,
7852 .end = predicate->location.end
7853 },
7854 },
7855 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7856 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7857 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7858 .predicate = predicate,
7859 .statements = statements
7860 };
7861
7862 return node;
7863}
7864
7868static pm_while_node_t *
7869pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7870 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7871
7872 *node = (pm_while_node_t) {
7873 {
7874 .type = PM_WHILE_NODE,
7875 .node_id = PM_NODE_IDENTIFY(parser),
7876 .location = PM_LOCATION_NULL_VALUE(parser)
7877 },
7878 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7879 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7880 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7881 .predicate = predicate,
7882 .statements = statements
7883 };
7884
7885 return node;
7886}
7887
7892static pm_x_string_node_t *
7893pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7894 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7895
7896 *node = (pm_x_string_node_t) {
7897 {
7898 .type = PM_X_STRING_NODE,
7899 .flags = PM_STRING_FLAGS_FROZEN,
7900 .node_id = PM_NODE_IDENTIFY(parser),
7901 .location = {
7902 .start = opening->start,
7903 .end = closing->end
7904 },
7905 },
7906 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7907 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7908 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7909 .unescaped = *unescaped
7910 };
7911
7912 return node;
7913}
7914
7918static inline pm_x_string_node_t *
7919pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7920 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7921}
7922
7926static pm_yield_node_t *
7927pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7928 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7929
7930 const uint8_t *end;
7931 if (rparen_loc->start != NULL) {
7932 end = rparen_loc->end;
7933 } else if (arguments != NULL) {
7934 end = arguments->base.location.end;
7935 } else if (lparen_loc->start != NULL) {
7936 end = lparen_loc->end;
7937 } else {
7938 end = keyword->end;
7939 }
7940
7941 *node = (pm_yield_node_t) {
7942 {
7943 .type = PM_YIELD_NODE,
7944 .node_id = PM_NODE_IDENTIFY(parser),
7945 .location = {
7946 .start = keyword->start,
7947 .end = end
7948 },
7949 },
7950 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7951 .lparen_loc = *lparen_loc,
7952 .arguments = arguments,
7953 .rparen_loc = *rparen_loc
7954 };
7955
7956 return node;
7957}
7958
7959#undef PM_NODE_ALLOC
7960#undef PM_NODE_IDENTIFY
7961
7966static int
7967pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7968 pm_scope_t *scope = parser->current_scope;
7969 int depth = 0;
7970
7971 while (scope != NULL) {
7972 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7973 if (scope->closed) break;
7974
7975 scope = scope->previous;
7976 depth++;
7977 }
7978
7979 return -1;
7980}
7981
7987static inline int
7988pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7989 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7990}
7991
7995static inline void
7996pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7997 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7998}
7999
8003static pm_constant_id_t
8004pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8005 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8006 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8007 return constant_id;
8008}
8009
8013static inline pm_constant_id_t
8014pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8015 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8016}
8017
8021static pm_constant_id_t
8022pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8023 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8024 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8025 return constant_id;
8026}
8027
8031static pm_constant_id_t
8032pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8033 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8034 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8035 return constant_id;
8036}
8037
8045static bool
8046pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8047 // We want to check whether the parameter name is a numbered parameter or
8048 // not.
8049 pm_refute_numbered_parameter(parser, name->start, name->end);
8050
8051 // Otherwise we'll fetch the constant id for the parameter name and check
8052 // whether it's already in the current scope.
8053 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8054
8055 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8056 // Add an error if the parameter doesn't start with _ and has been seen before
8057 if ((name->start < name->end) && (*name->start != '_')) {
8058 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8059 }
8060 return true;
8061 }
8062 return false;
8063}
8064
8068static void
8069pm_parser_scope_pop(pm_parser_t *parser) {
8070 pm_scope_t *scope = parser->current_scope;
8071 parser->current_scope = scope->previous;
8072 pm_locals_free(&scope->locals);
8073 pm_node_list_free(&scope->implicit_parameters);
8074 xfree(scope);
8075}
8076
8077/******************************************************************************/
8078/* Stack helpers */
8079/******************************************************************************/
8080
8084static inline void
8085pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8086 *stack = (*stack << 1) | (value & 1);
8087}
8088
8092static inline void
8093pm_state_stack_pop(pm_state_stack_t *stack) {
8094 *stack >>= 1;
8095}
8096
8100static inline bool
8101pm_state_stack_p(const pm_state_stack_t *stack) {
8102 return *stack & 1;
8103}
8104
8105static inline void
8106pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8107 // Use the negation of the value to prevent stack overflow.
8108 pm_state_stack_push(&parser->accepts_block_stack, !value);
8109}
8110
8111static inline void
8112pm_accepts_block_stack_pop(pm_parser_t *parser) {
8113 pm_state_stack_pop(&parser->accepts_block_stack);
8114}
8115
8116static inline bool
8117pm_accepts_block_stack_p(pm_parser_t *parser) {
8118 return !pm_state_stack_p(&parser->accepts_block_stack);
8119}
8120
8121static inline void
8122pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8123 pm_state_stack_push(&parser->do_loop_stack, value);
8124}
8125
8126static inline void
8127pm_do_loop_stack_pop(pm_parser_t *parser) {
8128 pm_state_stack_pop(&parser->do_loop_stack);
8129}
8130
8131static inline bool
8132pm_do_loop_stack_p(pm_parser_t *parser) {
8133 return pm_state_stack_p(&parser->do_loop_stack);
8134}
8135
8136/******************************************************************************/
8137/* Lexer check helpers */
8138/******************************************************************************/
8139
8144static inline uint8_t
8145peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8146 if (cursor < parser->end) {
8147 return *cursor;
8148 } else {
8149 return '\0';
8150 }
8151}
8152
8158static inline uint8_t
8159peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8160 return peek_at(parser, parser->current.end + offset);
8161}
8162
8167static inline uint8_t
8168peek(const pm_parser_t *parser) {
8169 return peek_at(parser, parser->current.end);
8170}
8171
8176static inline bool
8177match(pm_parser_t *parser, uint8_t value) {
8178 if (peek(parser) == value) {
8179 parser->current.end++;
8180 return true;
8181 }
8182 return false;
8183}
8184
8189static inline size_t
8190match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8191 if (peek_at(parser, cursor) == '\n') {
8192 return 1;
8193 }
8194 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8195 return 2;
8196 }
8197 return 0;
8198}
8199
8205static inline size_t
8206match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8207 return match_eol_at(parser, parser->current.end + offset);
8208}
8209
8215static inline size_t
8216match_eol(pm_parser_t *parser) {
8217 return match_eol_at(parser, parser->current.end);
8218}
8219
8223static inline const uint8_t *
8224next_newline(const uint8_t *cursor, ptrdiff_t length) {
8225 assert(length >= 0);
8226
8227 // Note that it's okay for us to use memchr here to look for \n because none
8228 // of the encodings that we support have \n as a component of a multi-byte
8229 // character.
8230 return memchr(cursor, '\n', (size_t) length);
8231}
8232
8236static inline bool
8237ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8238 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8239}
8240
8245static bool
8246parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8247 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8248
8249 if (encoding != NULL) {
8250 if (parser->encoding != encoding) {
8251 parser->encoding = encoding;
8252 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8253 }
8254
8255 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8256 return true;
8257 }
8258
8259 return false;
8260}
8261
8266static void
8267parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8268 const uint8_t *cursor = parser->current.start + 1;
8269 const uint8_t *end = parser->current.end;
8270
8271 bool separator = false;
8272 while (true) {
8273 if (end - cursor <= 6) return;
8274 switch (cursor[6]) {
8275 case 'C': case 'c': cursor += 6; continue;
8276 case 'O': case 'o': cursor += 5; continue;
8277 case 'D': case 'd': cursor += 4; continue;
8278 case 'I': case 'i': cursor += 3; continue;
8279 case 'N': case 'n': cursor += 2; continue;
8280 case 'G': case 'g': cursor += 1; continue;
8281 case '=': case ':':
8282 separator = true;
8283 cursor += 6;
8284 break;
8285 default:
8286 cursor += 6;
8287 if (pm_char_is_whitespace(*cursor)) break;
8288 continue;
8289 }
8290 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8291 separator = false;
8292 }
8293
8294 while (true) {
8295 do {
8296 if (++cursor >= end) return;
8297 } while (pm_char_is_whitespace(*cursor));
8298
8299 if (separator) break;
8300 if (*cursor != '=' && *cursor != ':') return;
8301
8302 separator = true;
8303 cursor++;
8304 }
8305
8306 const uint8_t *value_start = cursor;
8307 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8308
8309 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8310 // If we were unable to parse the encoding value, then we've got an
8311 // issue because we didn't understand the encoding that the user was
8312 // trying to use. In this case we'll keep using the default encoding but
8313 // add an error to the parser to indicate an unsuccessful parse.
8314 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8315 }
8316}
8317
8318typedef enum {
8319 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8320 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8321 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8322} pm_magic_comment_boolean_value_t;
8323
8328static pm_magic_comment_boolean_value_t
8329parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8330 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8331 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8332 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8333 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8334 } else {
8335 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8336 }
8337}
8338
8339static inline bool
8340pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8341 return b == '\'' || b == '"' || b == ':' || b == ';';
8342}
8343
8349static inline const uint8_t *
8350parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8351 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8352 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8353 return cursor;
8354 }
8355 cursor++;
8356 }
8357 return NULL;
8358}
8359
8370static inline bool
8371parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8372 bool result = true;
8373
8374 const uint8_t *start = parser->current.start + 1;
8375 const uint8_t *end = parser->current.end;
8376 if (end - start <= 7) return false;
8377
8378 const uint8_t *cursor;
8379 bool indicator = false;
8380
8381 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8382 start = cursor + 3;
8383
8384 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8385 end = cursor;
8386 indicator = true;
8387 } else {
8388 // If we have a start marker but not an end marker, then we cannot
8389 // have a magic comment.
8390 return false;
8391 }
8392 }
8393
8394 cursor = start;
8395 while (cursor < end) {
8396 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8397
8398 const uint8_t *key_start = cursor;
8399 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8400
8401 const uint8_t *key_end = cursor;
8402 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8403 if (cursor == end) break;
8404
8405 if (*cursor == ':') {
8406 cursor++;
8407 } else {
8408 if (!indicator) return false;
8409 continue;
8410 }
8411
8412 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8413 if (cursor == end) break;
8414
8415 const uint8_t *value_start;
8416 const uint8_t *value_end;
8417
8418 if (*cursor == '"') {
8419 value_start = ++cursor;
8420 for (; cursor < end && *cursor != '"'; cursor++) {
8421 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8422 }
8423 value_end = cursor;
8424 if (*cursor == '"') cursor++;
8425 } else {
8426 value_start = cursor;
8427 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8428 value_end = cursor;
8429 }
8430
8431 if (indicator) {
8432 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8433 } else {
8434 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8435 if (cursor != end) return false;
8436 }
8437
8438 // Here, we need to do some processing on the key to swap out dashes for
8439 // underscores. We only need to do this if there _is_ a dash in the key.
8440 pm_string_t key;
8441 const size_t key_length = (size_t) (key_end - key_start);
8442 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8443
8444 if (dash == NULL) {
8445 pm_string_shared_init(&key, key_start, key_end);
8446 } else {
8447 uint8_t *buffer = xmalloc(key_length);
8448 if (buffer == NULL) break;
8449
8450 memcpy(buffer, key_start, key_length);
8451 buffer[dash - key_start] = '_';
8452
8453 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8454 buffer[dash - key_start] = '_';
8455 }
8456
8457 pm_string_owned_init(&key, buffer, key_length);
8458 }
8459
8460 // Finally, we can start checking the key against the list of known
8461 // magic comment keys, and potentially change state based on that.
8462 const uint8_t *key_source = pm_string_source(&key);
8463 uint32_t value_length = (uint32_t) (value_end - value_start);
8464
8465 // We only want to attempt to compare against encoding comments if it's
8466 // the first line in the file (or the second in the case of a shebang).
8467 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8468 if (
8469 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8470 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8471 ) {
8472 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8473 }
8474 }
8475
8476 if (key_length == 11) {
8477 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8478 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8479 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8480 PM_PARSER_WARN_TOKEN_FORMAT(
8481 parser,
8482 parser->current,
8483 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8484 (int) key_length,
8485 (const char *) key_source,
8486 (int) value_length,
8487 (const char *) value_start
8488 );
8489 break;
8490 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8491 parser->warn_mismatched_indentation = false;
8492 break;
8493 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8494 parser->warn_mismatched_indentation = true;
8495 break;
8496 }
8497 }
8498 } else if (key_length == 21) {
8499 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8500 // We only want to handle frozen string literal comments if it's
8501 // before any semantic tokens have been seen.
8502 if (semantic_token_seen) {
8503 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8504 } else {
8505 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8506 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8507 PM_PARSER_WARN_TOKEN_FORMAT(
8508 parser,
8509 parser->current,
8510 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8511 (int) key_length,
8512 (const char *) key_source,
8513 (int) value_length,
8514 (const char *) value_start
8515 );
8516 break;
8517 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8519 break;
8520 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8522 break;
8523 }
8524 }
8525 }
8526 } else if (key_length == 24) {
8527 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8528 const uint8_t *cursor = parser->current.start;
8529 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8530
8531 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8532 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8533 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8534 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8535 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8536 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8537 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8538 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8539 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8540 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8541 } else {
8542 PM_PARSER_WARN_TOKEN_FORMAT(
8543 parser,
8544 parser->current,
8545 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8546 (int) key_length,
8547 (const char *) key_source,
8548 (int) value_length,
8549 (const char *) value_start
8550 );
8551 }
8552 }
8553 }
8554
8555 // When we're done, we want to free the string in case we had to
8556 // allocate memory for it.
8557 pm_string_free(&key);
8558
8559 // Allocate a new magic comment node to append to the parser's list.
8561 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8562 magic_comment->key_start = key_start;
8563 magic_comment->value_start = value_start;
8564 magic_comment->key_length = (uint32_t) key_length;
8565 magic_comment->value_length = value_length;
8566 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8567 }
8568 }
8569
8570 return result;
8571}
8572
8573/******************************************************************************/
8574/* Context manipulations */
8575/******************************************************************************/
8576
8577static bool
8578context_terminator(pm_context_t context, pm_token_t *token) {
8579 switch (context) {
8580 case PM_CONTEXT_MAIN:
8582 case PM_CONTEXT_DEFINED:
8584 case PM_CONTEXT_TERNARY:
8586 return token->type == PM_TOKEN_EOF;
8588 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8589 case PM_CONTEXT_PREEXE:
8590 case PM_CONTEXT_POSTEXE:
8591 return token->type == PM_TOKEN_BRACE_RIGHT;
8592 case PM_CONTEXT_MODULE:
8593 case PM_CONTEXT_CLASS:
8594 case PM_CONTEXT_SCLASS:
8596 case PM_CONTEXT_DEF:
8598 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8599 case PM_CONTEXT_WHILE:
8600 case PM_CONTEXT_UNTIL:
8601 case PM_CONTEXT_ELSE:
8602 case PM_CONTEXT_FOR:
8610 return token->type == PM_TOKEN_KEYWORD_END;
8612 return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8614 return token->type == PM_TOKEN_KEYWORD_IN;
8616 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8617 case PM_CONTEXT_CASE_IN:
8618 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8619 case PM_CONTEXT_IF:
8620 case PM_CONTEXT_ELSIF:
8621 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8622 case PM_CONTEXT_UNLESS:
8623 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8624 case PM_CONTEXT_EMBEXPR:
8625 return token->type == PM_TOKEN_EMBEXPR_END;
8627 return token->type == PM_TOKEN_BRACE_RIGHT;
8628 case PM_CONTEXT_PARENS:
8629 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8630 case PM_CONTEXT_BEGIN:
8638 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8646 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8648 return token->type == PM_TOKEN_BRACE_RIGHT;
8650 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8651 case PM_CONTEXT_NONE:
8652 return false;
8653 }
8654
8655 return false;
8656}
8657
8662static pm_context_t
8663context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8664 pm_context_node_t *context_node = parser->current_context;
8665
8666 while (context_node != NULL) {
8667 if (context_terminator(context_node->context, token)) return context_node->context;
8668 context_node = context_node->prev;
8669 }
8670
8671 return PM_CONTEXT_NONE;
8672}
8673
8674static bool
8675context_push(pm_parser_t *parser, pm_context_t context) {
8676 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8677 if (context_node == NULL) return false;
8678
8679 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8680
8681 if (parser->current_context == NULL) {
8682 parser->current_context = context_node;
8683 } else {
8684 context_node->prev = parser->current_context;
8685 parser->current_context = context_node;
8686 }
8687
8688 return true;
8689}
8690
8691static void
8692context_pop(pm_parser_t *parser) {
8693 pm_context_node_t *prev = parser->current_context->prev;
8694 xfree(parser->current_context);
8695 parser->current_context = prev;
8696}
8697
8698static bool
8699context_p(const pm_parser_t *parser, pm_context_t context) {
8700 pm_context_node_t *context_node = parser->current_context;
8701
8702 while (context_node != NULL) {
8703 if (context_node->context == context) return true;
8704 context_node = context_node->prev;
8705 }
8706
8707 return false;
8708}
8709
8710static bool
8711context_def_p(const pm_parser_t *parser) {
8712 pm_context_node_t *context_node = parser->current_context;
8713
8714 while (context_node != NULL) {
8715 switch (context_node->context) {
8716 case PM_CONTEXT_DEF:
8721 return true;
8722 case PM_CONTEXT_CLASS:
8726 case PM_CONTEXT_MODULE:
8730 case PM_CONTEXT_SCLASS:
8734 return false;
8735 default:
8736 context_node = context_node->prev;
8737 }
8738 }
8739
8740 return false;
8741}
8742
8747static const char *
8748context_human(pm_context_t context) {
8749 switch (context) {
8750 case PM_CONTEXT_NONE:
8751 assert(false && "unreachable");
8752 return "";
8753 case PM_CONTEXT_BEGIN: return "begin statement";
8754 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8755 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8756 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8757 case PM_CONTEXT_CASE_IN: return "'in' clause";
8758 case PM_CONTEXT_CLASS: return "class definition";
8759 case PM_CONTEXT_DEF: return "method definition";
8760 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8761 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8762 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8763 case PM_CONTEXT_ELSE:
8770 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8771 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8772 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8779 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8780 case PM_CONTEXT_FOR: return "for loop";
8781 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8782 case PM_CONTEXT_IF: return "if statement";
8783 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8784 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8785 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8786 case PM_CONTEXT_MAIN: return "top level context";
8787 case PM_CONTEXT_MODULE: return "module definition";
8788 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8789 case PM_CONTEXT_PARENS: return "parentheses";
8790 case PM_CONTEXT_POSTEXE: return "'END' block";
8791 case PM_CONTEXT_PREDICATE: return "predicate";
8792 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8800 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8801 case PM_CONTEXT_SCLASS: return "singleton class definition";
8802 case PM_CONTEXT_TERNARY: return "ternary expression";
8803 case PM_CONTEXT_UNLESS: return "unless statement";
8804 case PM_CONTEXT_UNTIL: return "until statement";
8805 case PM_CONTEXT_WHILE: return "while statement";
8806 }
8807
8808 assert(false && "unreachable");
8809 return "";
8810}
8811
8812/******************************************************************************/
8813/* Specific token lexers */
8814/******************************************************************************/
8815
8816static inline void
8817pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8818 if (invalid != NULL) {
8819 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8820 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8821 }
8822}
8823
8824static size_t
8825pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8826 const uint8_t *invalid = NULL;
8827 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8828 pm_strspn_number_validate(parser, string, length, invalid);
8829 return length;
8830}
8831
8832static size_t
8833pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8834 const uint8_t *invalid = NULL;
8835 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8836 pm_strspn_number_validate(parser, string, length, invalid);
8837 return length;
8838}
8839
8840static size_t
8841pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8842 const uint8_t *invalid = NULL;
8843 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8844 pm_strspn_number_validate(parser, string, length, invalid);
8845 return length;
8846}
8847
8848static size_t
8849pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8850 const uint8_t *invalid = NULL;
8851 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8852 pm_strspn_number_validate(parser, string, length, invalid);
8853 return length;
8854}
8855
8856static pm_token_type_t
8857lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8859
8860 // Here we're going to attempt to parse the optional decimal portion of a
8861 // float. If it's not there, then it's okay and we'll just continue on.
8862 if (peek(parser) == '.') {
8863 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8864 parser->current.end += 2;
8865 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8867 } else {
8868 // If we had a . and then something else, then it's not a float
8869 // suffix on a number it's a method call or something else.
8870 return type;
8871 }
8872 }
8873
8874 // Here we're going to attempt to parse the optional exponent portion of a
8875 // float. If it's not there, it's okay and we'll just continue on.
8876 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8877 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8878 parser->current.end += 2;
8879
8880 if (pm_char_is_decimal_digit(peek(parser))) {
8881 parser->current.end++;
8882 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8883 } else {
8884 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8885 }
8886 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8887 parser->current.end++;
8888 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8889 } else {
8890 return type;
8891 }
8892
8893 *seen_e = true;
8895 }
8896
8897 return type;
8898}
8899
8900static pm_token_type_t
8901lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8903 *seen_e = false;
8904
8905 if (peek_offset(parser, -1) == '0') {
8906 switch (*parser->current.end) {
8907 // 0d1111 is a decimal number
8908 case 'd':
8909 case 'D':
8910 parser->current.end++;
8911 if (pm_char_is_decimal_digit(peek(parser))) {
8912 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8913 } else {
8914 match(parser, '_');
8915 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8916 }
8917
8918 break;
8919
8920 // 0b1111 is a binary number
8921 case 'b':
8922 case 'B':
8923 parser->current.end++;
8924 if (pm_char_is_binary_digit(peek(parser))) {
8925 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8926 } else {
8927 match(parser, '_');
8928 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8929 }
8930
8932 break;
8933
8934 // 0o1111 is an octal number
8935 case 'o':
8936 case 'O':
8937 parser->current.end++;
8938 if (pm_char_is_octal_digit(peek(parser))) {
8939 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8940 } else {
8941 match(parser, '_');
8942 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8943 }
8944
8946 break;
8947
8948 // 01111 is an octal number
8949 case '_':
8950 case '0':
8951 case '1':
8952 case '2':
8953 case '3':
8954 case '4':
8955 case '5':
8956 case '6':
8957 case '7':
8958 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8960 break;
8961
8962 // 0x1111 is a hexadecimal number
8963 case 'x':
8964 case 'X':
8965 parser->current.end++;
8966 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8967 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8968 } else {
8969 match(parser, '_');
8970 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8971 }
8972
8974 break;
8975
8976 // 0.xxx is a float
8977 case '.': {
8978 type = lex_optional_float_suffix(parser, seen_e);
8979 break;
8980 }
8981
8982 // 0exxx is a float
8983 case 'e':
8984 case 'E': {
8985 type = lex_optional_float_suffix(parser, seen_e);
8986 break;
8987 }
8988 }
8989 } else {
8990 // If it didn't start with a 0, then we'll lex as far as we can into a
8991 // decimal number.
8992 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8993
8994 // Afterward, we'll lex as far as we can into an optional float suffix.
8995 type = lex_optional_float_suffix(parser, seen_e);
8996 }
8997
8998 // At this point we have a completed number, but we want to provide the user
8999 // with a good experience if they put an additional .xxx fractional
9000 // component on the end, so we'll check for that here.
9001 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
9002 const uint8_t *fraction_start = parser->current.end;
9003 const uint8_t *fraction_end = parser->current.end + 2;
9004 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
9005 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
9006 }
9007
9008 return type;
9009}
9010
9011static pm_token_type_t
9012lex_numeric(pm_parser_t *parser) {
9015
9016 if (parser->current.end < parser->end) {
9017 bool seen_e = false;
9018 type = lex_numeric_prefix(parser, &seen_e);
9019
9020 const uint8_t *end = parser->current.end;
9021 pm_token_type_t suffix_type = type;
9022
9023 if (type == PM_TOKEN_INTEGER) {
9024 if (match(parser, 'r')) {
9025 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9026
9027 if (match(parser, 'i')) {
9029 }
9030 } else if (match(parser, 'i')) {
9031 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9032 }
9033 } else {
9034 if (!seen_e && match(parser, 'r')) {
9035 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9036
9037 if (match(parser, 'i')) {
9039 }
9040 } else if (match(parser, 'i')) {
9041 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9042 }
9043 }
9044
9045 const uint8_t b = peek(parser);
9046 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9047 parser->current.end = end;
9048 } else {
9049 type = suffix_type;
9050 }
9051 }
9052
9053 return type;
9054}
9055
9056static pm_token_type_t
9057lex_global_variable(pm_parser_t *parser) {
9058 if (parser->current.end >= parser->end) {
9059 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9061 }
9062
9063 // True if multiple characters are allowed after the declaration of the
9064 // global variable. Not true when it starts with "$-".
9065 bool allow_multiple = true;
9066
9067 switch (*parser->current.end) {
9068 case '~': // $~: match-data
9069 case '*': // $*: argv
9070 case '$': // $$: pid
9071 case '?': // $?: last status
9072 case '!': // $!: error string
9073 case '@': // $@: error position
9074 case '/': // $/: input record separator
9075 case '\\': // $\: output record separator
9076 case ';': // $;: field separator
9077 case ',': // $,: output field separator
9078 case '.': // $.: last read line number
9079 case '=': // $=: ignorecase
9080 case ':': // $:: load path
9081 case '<': // $<: reading filename
9082 case '>': // $>: default output handle
9083 case '\"': // $": already loaded files
9084 parser->current.end++;
9086
9087 case '&': // $&: last match
9088 case '`': // $`: string before last match
9089 case '\'': // $': string after last match
9090 case '+': // $+: string matches last paren.
9091 parser->current.end++;
9092 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9093
9094 case '0': {
9095 parser->current.end++;
9096 size_t width;
9097
9098 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9099 do {
9100 parser->current.end += width;
9101 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9102
9103 // $0 isn't allowed to be followed by anything.
9104 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9105 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9106 }
9107
9109 }
9110
9111 case '1':
9112 case '2':
9113 case '3':
9114 case '4':
9115 case '5':
9116 case '6':
9117 case '7':
9118 case '8':
9119 case '9':
9120 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9121 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9122
9123 case '-':
9124 parser->current.end++;
9125 allow_multiple = false;
9127 default: {
9128 size_t width;
9129
9130 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9131 do {
9132 parser->current.end += width;
9133 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9134 } else if (pm_char_is_whitespace(peek(parser))) {
9135 // If we get here, then we have a $ followed by whitespace,
9136 // which is not allowed.
9137 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9138 } else {
9139 // If we get here, then we have a $ followed by something that
9140 // isn't recognized as a global variable.
9141 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9142 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9143 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9144 }
9145
9147 }
9148 }
9149}
9150
9163static inline pm_token_type_t
9164lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9165 if (memcmp(current_start, value, vlen) == 0) {
9166 pm_lex_state_t last_state = parser->lex_state;
9167
9168 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9169 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9170 } else {
9171 lex_state_set(parser, state);
9172 if (state == PM_LEX_STATE_BEG) {
9173 parser->command_start = true;
9174 }
9175
9176 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9177 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9178 return modifier_type;
9179 }
9180 }
9181
9182 return type;
9183 }
9184
9185 return PM_TOKEN_EOF;
9186}
9187
9188static pm_token_type_t
9189lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9190 // Lex as far as we can into the current identifier.
9191 size_t width;
9192 const uint8_t *end = parser->end;
9193 const uint8_t *current_start = parser->current.start;
9194 const uint8_t *current_end = parser->current.end;
9195 bool encoding_changed = parser->encoding_changed;
9196
9197 if (encoding_changed) {
9198 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9199 current_end += width;
9200 }
9201 } else {
9202 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9203 current_end += width;
9204 }
9205 }
9206 parser->current.end = current_end;
9207
9208 // Now cache the length of the identifier so that we can quickly compare it
9209 // against known keywords.
9210 width = (size_t) (current_end - current_start);
9211
9212 if (current_end < end) {
9213 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9214 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9215 // check if we're returning the defined? keyword or just an identifier.
9216 width++;
9217
9218 if (
9219 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9220 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9221 ) {
9222 // If we're in a position where we can accept a : at the end of an
9223 // identifier, then we'll optionally accept it.
9224 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9225 (void) match(parser, ':');
9226 return PM_TOKEN_LABEL;
9227 }
9228
9229 if (parser->lex_state != PM_LEX_STATE_DOT) {
9230 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9232 }
9233 }
9234
9235 return PM_TOKEN_METHOD_NAME;
9236 }
9237
9238 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9239 // If we're in a position where we can accept a = at the end of an
9240 // identifier, then we'll optionally accept it.
9241 return PM_TOKEN_IDENTIFIER;
9242 }
9243
9244 if (
9245 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9246 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9247 ) {
9248 // If we're in a position where we can accept a : at the end of an
9249 // identifier, then we'll optionally accept it.
9250 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9251 (void) match(parser, ':');
9252 return PM_TOKEN_LABEL;
9253 }
9254 }
9255
9256 if (parser->lex_state != PM_LEX_STATE_DOT) {
9258 switch (width) {
9259 case 2:
9260 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9261 if (pm_do_loop_stack_p(parser)) {
9263 }
9264 return PM_TOKEN_KEYWORD_DO;
9265 }
9266
9267 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9268 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270 break;
9271 case 3:
9272 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 break;
9280 case 4:
9281 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289 break;
9290 case 5:
9291 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9298 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9299 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9302 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 break;
9305 case 6:
9306 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9308 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9309 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9310 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9311 break;
9312 case 8:
9313 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9314 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9315 break;
9316 case 12:
9317 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9318 break;
9319 }
9320 }
9321
9322 if (encoding_changed) {
9323 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9324 }
9325 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9326}
9327
9332static bool
9333current_token_starts_line(pm_parser_t *parser) {
9334 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9335}
9336
9351static pm_token_type_t
9352lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9353 // If there is no content following this #, then we're at the end of
9354 // the string and we can safely return string content.
9355 if (pound + 1 >= parser->end) {
9356 parser->current.end = pound + 1;
9358 }
9359
9360 // Now we'll check against the character that follows the #. If it constitutes
9361 // valid interplation, we'll handle that, otherwise we'll return
9362 // PM_TOKEN_NOT_PROVIDED.
9363 switch (pound[1]) {
9364 case '@': {
9365 // In this case we may have hit an embedded instance or class variable.
9366 if (pound + 2 >= parser->end) {
9367 parser->current.end = pound + 1;
9369 }
9370
9371 // If we're looking at a @ and there's another @, then we'll skip past the
9372 // second @.
9373 const uint8_t *variable = pound + 2;
9374 if (*variable == '@' && pound + 3 < parser->end) variable++;
9375
9376 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9377 // At this point we're sure that we've either hit an embedded instance
9378 // or class variable. In this case we'll first need to check if we've
9379 // already consumed content.
9380 if (pound > parser->current.start) {
9381 parser->current.end = pound;
9383 }
9384
9385 // Otherwise we need to return the embedded variable token
9386 // and then switch to the embedded variable lex mode.
9387 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9388 parser->current.end = pound + 1;
9389 return PM_TOKEN_EMBVAR;
9390 }
9391
9392 // If we didn't get a valid interpolation, then this is just regular
9393 // string content. This is like if we get "#@-". In this case the caller
9394 // should keep lexing.
9395 parser->current.end = pound + 1;
9396 return PM_TOKEN_NOT_PROVIDED;
9397 }
9398 case '$':
9399 // In this case we may have hit an embedded global variable. If there's
9400 // not enough room, then we'll just return string content.
9401 if (pound + 2 >= parser->end) {
9402 parser->current.end = pound + 1;
9404 }
9405
9406 // This is the character that we're going to check to see if it is the
9407 // start of an identifier that would indicate that this is a global
9408 // variable.
9409 const uint8_t *check = pound + 2;
9410
9411 if (pound[2] == '-') {
9412 if (pound + 3 >= parser->end) {
9413 parser->current.end = pound + 2;
9415 }
9416
9417 check++;
9418 }
9419
9420 // If the character that we're going to check is the start of an
9421 // identifier, or we don't have a - and the character is a decimal number
9422 // or a global name punctuation character, then we've hit an embedded
9423 // global variable.
9424 if (
9425 char_is_identifier_start(parser, check, parser->end - check) ||
9426 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9427 ) {
9428 // In this case we've hit an embedded global variable. First check to
9429 // see if we've already consumed content. If we have, then we need to
9430 // return that content as string content first.
9431 if (pound > parser->current.start) {
9432 parser->current.end = pound;
9434 }
9435
9436 // Otherwise, we need to return the embedded variable token and switch
9437 // to the embedded variable lex mode.
9438 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9439 parser->current.end = pound + 1;
9440 return PM_TOKEN_EMBVAR;
9441 }
9442
9443 // In this case we've hit a #$ that does not indicate a global variable.
9444 // In this case we'll continue lexing past it.
9445 parser->current.end = pound + 1;
9446 return PM_TOKEN_NOT_PROVIDED;
9447 case '{':
9448 // In this case it's the start of an embedded expression. If we have
9449 // already consumed content, then we need to return that content as string
9450 // content first.
9451 if (pound > parser->current.start) {
9452 parser->current.end = pound;
9454 }
9455
9456 parser->enclosure_nesting++;
9457
9458 // Otherwise we'll skip past the #{ and begin lexing the embedded
9459 // expression.
9460 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9461 parser->current.end = pound + 2;
9462 parser->command_start = true;
9463 pm_do_loop_stack_push(parser, false);
9465 default:
9466 // In this case we've hit a # that doesn't constitute interpolation. We'll
9467 // mark that by returning the not provided token type. This tells the
9468 // consumer to keep lexing forward.
9469 parser->current.end = pound + 1;
9470 return PM_TOKEN_NOT_PROVIDED;
9471 }
9472}
9473
9474static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9475static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9476static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9477static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9478static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9479
9483static const bool ascii_printable_chars[] = {
9484 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9490 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9491 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9492};
9493
9494static inline bool
9495char_is_ascii_printable(const uint8_t b) {
9496 return (b < 0x80) && ascii_printable_chars[b];
9497}
9498
9503static inline uint8_t
9504escape_hexadecimal_digit(const uint8_t value) {
9505 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9506}
9507
9513static inline uint32_t
9514escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9515 uint32_t value = 0;
9516 for (size_t index = 0; index < length; index++) {
9517 if (index != 0) value <<= 4;
9518 value |= escape_hexadecimal_digit(string[index]);
9519 }
9520
9521 // Here we're going to verify that the value is actually a valid Unicode
9522 // codepoint and not a surrogate pair.
9523 if (value >= 0xD800 && value <= 0xDFFF) {
9524 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9525 return 0xFFFD;
9526 }
9527
9528 return value;
9529}
9530
9534static inline uint8_t
9535escape_byte(uint8_t value, const uint8_t flags) {
9536 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9537 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9538 return value;
9539}
9540
9544static inline void
9545escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9546 // \u escape sequences in string-like structures implicitly change the
9547 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9548 // literal.
9549 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9550 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9551 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9552 }
9553
9555 }
9556
9557 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9558 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9559 pm_buffer_append_byte(buffer, 0xEF);
9560 pm_buffer_append_byte(buffer, 0xBF);
9561 pm_buffer_append_byte(buffer, 0xBD);
9562 }
9563}
9564
9569static inline void
9570escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9571 if (byte >= 0x80) {
9572 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9573 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9574 }
9575
9576 parser->explicit_encoding = parser->encoding;
9577 }
9578
9579 pm_buffer_append_byte(buffer, byte);
9580}
9581
9597static inline void
9598escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9599 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9600 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9601 }
9602
9603 escape_write_byte_encoded(parser, buffer, byte);
9604}
9605
9609static inline void
9610escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9611 size_t width;
9612 if (parser->encoding_changed) {
9613 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9614 } else {
9615 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9616 }
9617
9618 if (width == 1) {
9619 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9620 } else if (width > 1) {
9621 // Valid multibyte character. Just ignore escape.
9622 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9623 pm_buffer_append_bytes(b, parser->current.end, width);
9624 parser->current.end += width;
9625 } else {
9626 // Assume the next character wasn't meant to be part of this escape
9627 // sequence since it is invalid. Add an error and move on.
9628 parser->current.end++;
9629 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9630 }
9631}
9632
9638static void
9639escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9640#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9641
9642 PM_PARSER_WARN_TOKEN_FORMAT(
9643 parser,
9644 parser->current,
9645 PM_WARN_INVALID_CHARACTER,
9646 FLAG(flags),
9647 FLAG(flag),
9648 type
9649 );
9650
9651#undef FLAG
9652}
9653
9657static void
9658escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9659 uint8_t peeked = peek(parser);
9660 switch (peeked) {
9661 case '\\': {
9662 parser->current.end++;
9663 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9664 return;
9665 }
9666 case '\'': {
9667 parser->current.end++;
9668 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9669 return;
9670 }
9671 case 'a': {
9672 parser->current.end++;
9673 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9674 return;
9675 }
9676 case 'b': {
9677 parser->current.end++;
9678 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9679 return;
9680 }
9681 case 'e': {
9682 parser->current.end++;
9683 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9684 return;
9685 }
9686 case 'f': {
9687 parser->current.end++;
9688 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9689 return;
9690 }
9691 case 'n': {
9692 parser->current.end++;
9693 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9694 return;
9695 }
9696 case 'r': {
9697 parser->current.end++;
9698 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9699 return;
9700 }
9701 case 's': {
9702 parser->current.end++;
9703 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9704 return;
9705 }
9706 case 't': {
9707 parser->current.end++;
9708 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9709 return;
9710 }
9711 case 'v': {
9712 parser->current.end++;
9713 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9714 return;
9715 }
9716 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9717 uint8_t value = (uint8_t) (*parser->current.end - '0');
9718 parser->current.end++;
9719
9720 if (pm_char_is_octal_digit(peek(parser))) {
9721 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9722 parser->current.end++;
9723
9724 if (pm_char_is_octal_digit(peek(parser))) {
9725 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9726 parser->current.end++;
9727 }
9728 }
9729
9730 value = escape_byte(value, flags);
9731 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9732 return;
9733 }
9734 case 'x': {
9735 const uint8_t *start = parser->current.end - 1;
9736
9737 parser->current.end++;
9738 uint8_t byte = peek(parser);
9739
9740 if (pm_char_is_hexadecimal_digit(byte)) {
9741 uint8_t value = escape_hexadecimal_digit(byte);
9742 parser->current.end++;
9743
9744 byte = peek(parser);
9745 if (pm_char_is_hexadecimal_digit(byte)) {
9746 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9747 parser->current.end++;
9748 }
9749
9750 value = escape_byte(value, flags);
9751 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9752 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9753 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9754 } else {
9755 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9756 }
9757 }
9758
9759 escape_write_byte_encoded(parser, buffer, value);
9760 } else {
9761 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9762 }
9763
9764 return;
9765 }
9766 case 'u': {
9767 const uint8_t *start = parser->current.end - 1;
9768 parser->current.end++;
9769
9770 if (parser->current.end == parser->end) {
9771 const uint8_t *start = parser->current.end - 2;
9772 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9773 } else if (peek(parser) == '{') {
9774 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9775 parser->current.end++;
9776
9777 size_t whitespace;
9778 while (true) {
9779 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9780 parser->current.end += whitespace;
9781 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9782 // This is super hacky, but it gets us nicer error
9783 // messages because we can still pass it off to the
9784 // regular expression engine even if we hit an
9785 // unterminated regular expression.
9786 parser->current.end += 2;
9787 } else {
9788 break;
9789 }
9790 }
9791
9792 const uint8_t *extra_codepoints_start = NULL;
9793 int codepoints_count = 0;
9794
9795 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9796 const uint8_t *unicode_start = parser->current.end;
9797 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9798
9799 if (hexadecimal_length > 6) {
9800 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9801 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9802 } else if (hexadecimal_length == 0) {
9803 // there are not hexadecimal characters
9804
9805 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9806 // If this is a regular expression, we are going to
9807 // let the regular expression engine handle this
9808 // error instead of us.
9809 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9810 } else {
9811 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9812 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9813 }
9814
9815 return;
9816 }
9817
9818 parser->current.end += hexadecimal_length;
9819 codepoints_count++;
9820 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9821 extra_codepoints_start = unicode_start;
9822 }
9823
9824 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9825 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9826
9827 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9828 }
9829
9830 // ?\u{nnnn} character literal should contain only one codepoint
9831 // and cannot be like ?\u{nnnn mmmm}.
9832 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9833 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9834 }
9835
9836 if (parser->current.end == parser->end) {
9837 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9838 } else if (peek(parser) == '}') {
9839 parser->current.end++;
9840 } else {
9841 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9842 // If this is a regular expression, we are going to let
9843 // the regular expression engine handle this error
9844 // instead of us.
9845 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9846 } else {
9847 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9848 }
9849 }
9850
9851 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9852 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9853 }
9854 } else {
9855 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9856
9857 if (length == 0) {
9858 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9859 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9860 } else {
9861 const uint8_t *start = parser->current.end - 2;
9862 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9863 }
9864 } else if (length == 4) {
9865 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9866
9867 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9868 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9869 }
9870
9871 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9872 parser->current.end += 4;
9873 } else {
9874 parser->current.end += length;
9875
9876 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9877 // If this is a regular expression, we are going to let
9878 // the regular expression engine handle this error
9879 // instead of us.
9880 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9881 } else {
9882 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9883 }
9884 }
9885 }
9886
9887 return;
9888 }
9889 case 'c': {
9890 parser->current.end++;
9891 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9892 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9893 }
9894
9895 if (parser->current.end == parser->end) {
9896 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9897 return;
9898 }
9899
9900 uint8_t peeked = peek(parser);
9901 switch (peeked) {
9902 case '?': {
9903 parser->current.end++;
9904 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9905 return;
9906 }
9907 case '\\':
9908 parser->current.end++;
9909
9910 if (match(parser, 'u') || match(parser, 'U')) {
9911 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9912 return;
9913 }
9914
9915 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9916 return;
9917 case ' ':
9918 parser->current.end++;
9919 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9920 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9921 return;
9922 case '\t':
9923 parser->current.end++;
9924 escape_read_warn(parser, flags, 0, "\\t");
9925 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9926 return;
9927 default: {
9928 if (!char_is_ascii_printable(peeked)) {
9929 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9930 return;
9931 }
9932
9933 parser->current.end++;
9934 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9935 return;
9936 }
9937 }
9938 }
9939 case 'C': {
9940 parser->current.end++;
9941 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9942 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9943 }
9944
9945 if (peek(parser) != '-') {
9946 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9947 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9948 return;
9949 }
9950
9951 parser->current.end++;
9952 if (parser->current.end == parser->end) {
9953 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9954 return;
9955 }
9956
9957 uint8_t peeked = peek(parser);
9958 switch (peeked) {
9959 case '?': {
9960 parser->current.end++;
9961 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9962 return;
9963 }
9964 case '\\':
9965 parser->current.end++;
9966
9967 if (match(parser, 'u') || match(parser, 'U')) {
9968 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9969 return;
9970 }
9971
9972 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9973 return;
9974 case ' ':
9975 parser->current.end++;
9976 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9977 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9978 return;
9979 case '\t':
9980 parser->current.end++;
9981 escape_read_warn(parser, flags, 0, "\\t");
9982 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9983 return;
9984 default: {
9985 if (!char_is_ascii_printable(peeked)) {
9986 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9987 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9988 return;
9989 }
9990
9991 parser->current.end++;
9992 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9993 return;
9994 }
9995 }
9996 }
9997 case 'M': {
9998 parser->current.end++;
9999 if (flags & PM_ESCAPE_FLAG_META) {
10000 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10001 }
10002
10003 if (peek(parser) != '-') {
10004 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10005 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10006 return;
10007 }
10008
10009 parser->current.end++;
10010 if (parser->current.end == parser->end) {
10011 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10012 return;
10013 }
10014
10015 uint8_t peeked = peek(parser);
10016 switch (peeked) {
10017 case '\\':
10018 parser->current.end++;
10019
10020 if (match(parser, 'u') || match(parser, 'U')) {
10021 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10022 return;
10023 }
10024
10025 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10026 return;
10027 case ' ':
10028 parser->current.end++;
10029 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10030 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10031 return;
10032 case '\t':
10033 parser->current.end++;
10034 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10035 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10036 return;
10037 default:
10038 if (!char_is_ascii_printable(peeked)) {
10039 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10040 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10041 return;
10042 }
10043
10044 parser->current.end++;
10045 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10046 return;
10047 }
10048 }
10049 case '\r': {
10050 if (peek_offset(parser, 1) == '\n') {
10051 parser->current.end += 2;
10052 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10053 return;
10054 }
10056 }
10057 default: {
10058 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10059 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10060 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10061 return;
10062 }
10063 if (parser->current.end < parser->end) {
10064 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10065 } else {
10066 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10067 }
10068 return;
10069 }
10070 }
10071}
10072
10098static pm_token_type_t
10099lex_question_mark(pm_parser_t *parser) {
10100 if (lex_state_end_p(parser)) {
10101 lex_state_set(parser, PM_LEX_STATE_BEG);
10103 }
10104
10105 if (parser->current.end >= parser->end) {
10106 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10107 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10109 }
10110
10111 if (pm_char_is_whitespace(*parser->current.end)) {
10112 lex_state_set(parser, PM_LEX_STATE_BEG);
10114 }
10115
10116 lex_state_set(parser, PM_LEX_STATE_BEG);
10117
10118 if (match(parser, '\\')) {
10119 lex_state_set(parser, PM_LEX_STATE_END);
10120
10121 pm_buffer_t buffer;
10122 pm_buffer_init_capacity(&buffer, 3);
10123
10124 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10125 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10126
10128 } else {
10129 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10130
10131 // Ternary operators can have a ? immediately followed by an identifier
10132 // which starts with an underscore. We check for this case here.
10133 if (
10134 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10135 (
10136 (parser->current.end + encoding_width >= parser->end) ||
10137 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10138 )
10139 ) {
10140 lex_state_set(parser, PM_LEX_STATE_END);
10141 parser->current.end += encoding_width;
10142 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10144 }
10145 }
10146
10148}
10149
10154static pm_token_type_t
10155lex_at_variable(pm_parser_t *parser) {
10157 const uint8_t *end = parser->end;
10158
10159 size_t width;
10160 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10161 parser->current.end += width;
10162
10163 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10164 parser->current.end += width;
10165 }
10166 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10167 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10168 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10169 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10170 }
10171
10172 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10173 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10174 } else {
10175 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10176 pm_parser_err_token(parser, &parser->current, diag_id);
10177 }
10178
10179 // If we're lexing an embedded variable, then we need to pop back into the
10180 // parent lex context.
10181 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10182 lex_mode_pop(parser);
10183 }
10184
10185 return type;
10186}
10187
10191static inline void
10192parser_lex_callback(pm_parser_t *parser) {
10193 if (parser->lex_callback) {
10194 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10195 }
10196}
10197
10201static inline pm_comment_t *
10202parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10203 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10204 if (comment == NULL) return NULL;
10205
10206 *comment = (pm_comment_t) {
10207 .type = type,
10208 .location = { parser->current.start, parser->current.end }
10209 };
10210
10211 return comment;
10212}
10213
10219static pm_token_type_t
10220lex_embdoc(pm_parser_t *parser) {
10221 // First, lex out the EMBDOC_BEGIN token.
10222 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10223
10224 if (newline == NULL) {
10225 parser->current.end = parser->end;
10226 } else {
10227 pm_newline_list_append(&parser->newline_list, newline);
10228 parser->current.end = newline + 1;
10229 }
10230
10231 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10232 parser_lex_callback(parser);
10233
10234 // Now, create a comment that is going to be attached to the parser.
10235 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10236 if (comment == NULL) return PM_TOKEN_EOF;
10237
10238 // Now, loop until we find the end of the embedded documentation or the end
10239 // of the file.
10240 while (parser->current.end + 4 <= parser->end) {
10241 parser->current.start = parser->current.end;
10242
10243 // If we've hit the end of the embedded documentation then we'll return
10244 // that token here.
10245 if (
10246 (memcmp(parser->current.end, "=end", 4) == 0) &&
10247 (
10248 (parser->current.end + 4 == parser->end) || // end of file
10249 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10250 (parser->current.end[4] == '\0') || // NUL or end of script
10251 (parser->current.end[4] == '\004') || // ^D
10252 (parser->current.end[4] == '\032') // ^Z
10253 )
10254 ) {
10255 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10256
10257 if (newline == NULL) {
10258 parser->current.end = parser->end;
10259 } else {
10260 pm_newline_list_append(&parser->newline_list, newline);
10261 parser->current.end = newline + 1;
10262 }
10263
10264 parser->current.type = PM_TOKEN_EMBDOC_END;
10265 parser_lex_callback(parser);
10266
10267 comment->location.end = parser->current.end;
10268 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10269
10270 return PM_TOKEN_EMBDOC_END;
10271 }
10272
10273 // Otherwise, we'll parse until the end of the line and return a line of
10274 // embedded documentation.
10275 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10276
10277 if (newline == NULL) {
10278 parser->current.end = parser->end;
10279 } else {
10280 pm_newline_list_append(&parser->newline_list, newline);
10281 parser->current.end = newline + 1;
10282 }
10283
10284 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10285 parser_lex_callback(parser);
10286 }
10287
10288 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10289
10290 comment->location.end = parser->current.end;
10291 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10292
10293 return PM_TOKEN_EOF;
10294}
10295
10301static inline void
10302parser_lex_ignored_newline(pm_parser_t *parser) {
10303 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10304 parser_lex_callback(parser);
10305}
10306
10316static inline void
10317parser_flush_heredoc_end(pm_parser_t *parser) {
10318 assert(parser->heredoc_end <= parser->end);
10319 parser->next_start = parser->heredoc_end;
10320 parser->heredoc_end = NULL;
10321}
10322
10326static bool
10327parser_end_of_line_p(const pm_parser_t *parser) {
10328 const uint8_t *cursor = parser->current.end;
10329
10330 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10331 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10332 }
10333
10334 return true;
10335}
10336
10355typedef struct {
10361
10366 const uint8_t *cursor;
10368
10388
10392static inline void
10393pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10394 pm_buffer_append_byte(&token_buffer->buffer, byte);
10395}
10396
10397static inline void
10398pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10399 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10400}
10401
10405static inline size_t
10406parser_char_width(const pm_parser_t *parser) {
10407 size_t width;
10408 if (parser->encoding_changed) {
10409 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10410 } else {
10411 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10412 }
10413
10414 // TODO: If the character is invalid in the given encoding, then we'll just
10415 // push one byte into the buffer. This should actually be an error.
10416 return (width == 0 ? 1 : width);
10417}
10418
10422static void
10423pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10424 size_t width = parser_char_width(parser);
10425 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10426 parser->current.end += width;
10427}
10428
10429static void
10430pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10431 size_t width = parser_char_width(parser);
10432 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10433 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10434 parser->current.end += width;
10435}
10436
10437static bool
10438pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10439 for (size_t index = 0; index < length; index++) {
10440 if (value[index] & 0x80) return false;
10441 }
10442
10443 return true;
10444}
10445
10452static inline void
10453pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10454 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10455}
10456
10457static inline void
10458pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10459 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10460 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10461 pm_buffer_free(&token_buffer->regexp_buffer);
10462}
10463
10473static void
10474pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10475 if (token_buffer->cursor == NULL) {
10476 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10477 } else {
10478 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10479 pm_token_buffer_copy(parser, token_buffer);
10480 }
10481}
10482
10483static void
10484pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10485 if (token_buffer->base.cursor == NULL) {
10486 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10487 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10488 } else {
10489 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10490 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10491 pm_regexp_token_buffer_copy(parser, token_buffer);
10492 }
10493}
10494
10495#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10496
10505static void
10506pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10507 const uint8_t *start;
10508 if (token_buffer->cursor == NULL) {
10509 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10510 start = parser->current.start;
10511 } else {
10512 start = token_buffer->cursor;
10513 }
10514
10515 const uint8_t *end = parser->current.end - 1;
10516 assert(end >= start);
10517 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10518
10519 token_buffer->cursor = end;
10520}
10521
10522static void
10523pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10524 const uint8_t *start;
10525 if (token_buffer->base.cursor == NULL) {
10526 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10527 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10528 start = parser->current.start;
10529 } else {
10530 start = token_buffer->base.cursor;
10531 }
10532
10533 const uint8_t *end = parser->current.end - 1;
10534 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10535 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10536
10537 token_buffer->base.cursor = end;
10538}
10539
10540#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10541
10546static inline size_t
10547pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10548 size_t whitespace = 0;
10549
10550 switch (indent) {
10551 case PM_HEREDOC_INDENT_NONE:
10552 // Do nothing, we can't match a terminator with
10553 // indentation and there's no need to calculate common
10554 // whitespace.
10555 break;
10556 case PM_HEREDOC_INDENT_DASH:
10557 // Skip past inline whitespace.
10558 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10559 break;
10560 case PM_HEREDOC_INDENT_TILDE:
10561 // Skip past inline whitespace and calculate common
10562 // whitespace.
10563 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10564 if (**cursor == '\t') {
10565 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10566 } else {
10567 whitespace++;
10568 }
10569 (*cursor)++;
10570 }
10571
10572 break;
10573 }
10574
10575 return whitespace;
10576}
10577
10582static uint8_t
10583pm_lex_percent_delimiter(pm_parser_t *parser) {
10584 size_t eol_length = match_eol(parser);
10585
10586 if (eol_length) {
10587 if (parser->heredoc_end) {
10588 // If we have already lexed a heredoc, then the newline has already
10589 // been added to the list. In this case we want to just flush the
10590 // heredoc end.
10591 parser_flush_heredoc_end(parser);
10592 } else {
10593 // Otherwise, we'll add the newline to the list of newlines.
10594 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10595 }
10596
10597 uint8_t delimiter = *parser->current.end;
10598
10599 // If our delimiter is \r\n, we want to treat it as if it's \n.
10600 // For example, %\r\nfoo\r\n should be "foo"
10601 if (eol_length == 2) {
10602 delimiter = *(parser->current.end + 1);
10603 }
10604
10605 parser->current.end += eol_length;
10606 return delimiter;
10607 }
10608
10609 return *parser->current.end++;
10610}
10611
10616#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10617
10624static void
10625parser_lex(pm_parser_t *parser) {
10626 assert(parser->current.end <= parser->end);
10627 parser->previous = parser->current;
10628
10629 // This value mirrors cmd_state from CRuby.
10630 bool previous_command_start = parser->command_start;
10631 parser->command_start = false;
10632
10633 // This is used to communicate to the newline lexing function that we've
10634 // already seen a comment.
10635 bool lexed_comment = false;
10636
10637 // Here we cache the current value of the semantic token seen flag. This is
10638 // used to reset it in case we find a token that shouldn't flip this flag.
10639 unsigned int semantic_token_seen = parser->semantic_token_seen;
10640 parser->semantic_token_seen = true;
10641
10642 switch (parser->lex_modes.current->mode) {
10643 case PM_LEX_DEFAULT:
10644 case PM_LEX_EMBEXPR:
10645 case PM_LEX_EMBVAR:
10646
10647 // We have a specific named label here because we are going to jump back to
10648 // this location in the event that we have lexed a token that should not be
10649 // returned to the parser. This includes comments, ignored newlines, and
10650 // invalid tokens of some form.
10651 lex_next_token: {
10652 // If we have the special next_start pointer set, then we're going to jump
10653 // to that location and start lexing from there.
10654 if (parser->next_start != NULL) {
10655 parser->current.end = parser->next_start;
10656 parser->next_start = NULL;
10657 }
10658
10659 // This value mirrors space_seen from CRuby. It tracks whether or not
10660 // space has been eaten before the start of the next token.
10661 bool space_seen = false;
10662
10663 // First, we're going to skip past any whitespace at the front of the next
10664 // token.
10665 bool chomping = true;
10666 while (parser->current.end < parser->end && chomping) {
10667 switch (*parser->current.end) {
10668 case ' ':
10669 case '\t':
10670 case '\f':
10671 case '\v':
10672 parser->current.end++;
10673 space_seen = true;
10674 break;
10675 case '\r':
10676 if (match_eol_offset(parser, 1)) {
10677 chomping = false;
10678 } else {
10679 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10680 parser->current.end++;
10681 space_seen = true;
10682 }
10683 break;
10684 case '\\': {
10685 size_t eol_length = match_eol_offset(parser, 1);
10686 if (eol_length) {
10687 if (parser->heredoc_end) {
10688 parser->current.end = parser->heredoc_end;
10689 parser->heredoc_end = NULL;
10690 } else {
10691 parser->current.end += eol_length + 1;
10692 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10693 space_seen = true;
10694 }
10695 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10696 parser->current.end += 2;
10697 } else {
10698 chomping = false;
10699 }
10700
10701 break;
10702 }
10703 default:
10704 chomping = false;
10705 break;
10706 }
10707 }
10708
10709 // Next, we'll set to start of this token to be the current end.
10710 parser->current.start = parser->current.end;
10711
10712 // We'll check if we're at the end of the file. If we are, then we
10713 // need to return the EOF token.
10714 if (parser->current.end >= parser->end) {
10715 // If we hit EOF, but the EOF came immediately after a newline,
10716 // set the start of the token to the newline. This way any EOF
10717 // errors will be reported as happening on that line rather than
10718 // a line after. For example "foo(\n" should report an error
10719 // on line 1 even though EOF technically occurs on line 2.
10720 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10721 parser->current.start -= 1;
10722 }
10723 LEX(PM_TOKEN_EOF);
10724 }
10725
10726 // Finally, we'll check the current character to determine the next
10727 // token.
10728 switch (*parser->current.end++) {
10729 case '\0': // NUL or end of script
10730 case '\004': // ^D
10731 case '\032': // ^Z
10732 parser->current.end--;
10733 LEX(PM_TOKEN_EOF);
10734
10735 case '#': { // comments
10736 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10737 parser->current.end = ending == NULL ? parser->end : ending;
10738
10739 // If we found a comment while lexing, then we're going to
10740 // add it to the list of comments in the file and keep
10741 // lexing.
10742 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10743 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10744
10745 if (ending) parser->current.end++;
10746 parser->current.type = PM_TOKEN_COMMENT;
10747 parser_lex_callback(parser);
10748
10749 // Here, parse the comment to see if it's a magic comment
10750 // and potentially change state on the parser.
10751 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10752 ptrdiff_t length = parser->current.end - parser->current.start;
10753
10754 // If we didn't find a magic comment within the first
10755 // pass and we're at the start of the file, then we need
10756 // to do another pass to potentially find other patterns
10757 // for encoding comments.
10758 if (length >= 10 && !parser->encoding_locked) {
10759 parser_lex_magic_comment_encoding(parser);
10760 }
10761 }
10762
10763 lexed_comment = true;
10764 }
10766 case '\r':
10767 case '\n': {
10768 parser->semantic_token_seen = semantic_token_seen & 0x1;
10769 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10770
10771 if (eol_length) {
10772 // The only way you can have carriage returns in this
10773 // particular loop is if you have a carriage return
10774 // followed by a newline. In that case we'll just skip
10775 // over the carriage return and continue lexing, in
10776 // order to make it so that the newline token
10777 // encapsulates both the carriage return and the
10778 // newline. Note that we need to check that we haven't
10779 // already lexed a comment here because that falls
10780 // through into here as well.
10781 if (!lexed_comment) {
10782 parser->current.end += eol_length - 1; // skip CR
10783 }
10784
10785 if (parser->heredoc_end == NULL) {
10786 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10787 }
10788 }
10789
10790 if (parser->heredoc_end) {
10791 parser_flush_heredoc_end(parser);
10792 }
10793
10794 // If this is an ignored newline, then we can continue lexing after
10795 // calling the callback with the ignored newline token.
10796 switch (lex_state_ignored_p(parser)) {
10797 case PM_IGNORED_NEWLINE_NONE:
10798 break;
10799 case PM_IGNORED_NEWLINE_PATTERN:
10800 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10801 if (!lexed_comment) parser_lex_ignored_newline(parser);
10802 lex_state_set(parser, PM_LEX_STATE_BEG);
10803 parser->command_start = true;
10804 parser->current.type = PM_TOKEN_NEWLINE;
10805 return;
10806 }
10808 case PM_IGNORED_NEWLINE_ALL:
10809 if (!lexed_comment) parser_lex_ignored_newline(parser);
10810 lexed_comment = false;
10811 goto lex_next_token;
10812 }
10813
10814 // Here we need to look ahead and see if there is a call operator
10815 // (either . or &.) that starts the next line. If there is, then this
10816 // is going to become an ignored newline and we're going to instead
10817 // return the call operator.
10818 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10819 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10820
10821 if (next_content < parser->end) {
10822 // If we hit a comment after a newline, then we're going to check
10823 // if it's ignored or if it's followed by a method call ('.').
10824 // If it is, then we're going to call the
10825 // callback with an ignored newline and then continue lexing.
10826 // Otherwise we'll return a regular newline.
10827 if (next_content[0] == '#') {
10828 // Here we look for a "." or "&." following a "\n".
10829 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10830
10831 while (following && (following + 1 < parser->end)) {
10832 following++;
10833 following += pm_strspn_inline_whitespace(following, parser->end - following);
10834
10835 // If this is not followed by a comment, then we can break out
10836 // of this loop.
10837 if (peek_at(parser, following) != '#') break;
10838
10839 // If there is a comment, then we need to find the end of the
10840 // comment and continue searching from there.
10841 following = next_newline(following, parser->end - following);
10842 }
10843
10844 // If the lex state was ignored, or we hit a '.' or a '&.',
10845 // we will lex the ignored newline
10846 if (
10847 lex_state_ignored_p(parser) ||
10848 (following && (
10849 (peek_at(parser, following) == '.') ||
10850 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10851 ))
10852 ) {
10853 if (!lexed_comment) parser_lex_ignored_newline(parser);
10854 lexed_comment = false;
10855 goto lex_next_token;
10856 }
10857 }
10858
10859 // If we hit a . after a newline, then we're in a call chain and
10860 // we need to return the call operator.
10861 if (next_content[0] == '.') {
10862 // To match ripper, we need to emit an ignored newline even though
10863 // it's a real newline in the case that we have a beginless range
10864 // on a subsequent line.
10865 if (peek_at(parser, next_content + 1) == '.') {
10866 if (!lexed_comment) parser_lex_ignored_newline(parser);
10867 lex_state_set(parser, PM_LEX_STATE_BEG);
10868 parser->command_start = true;
10869 parser->current.type = PM_TOKEN_NEWLINE;
10870 return;
10871 }
10872
10873 if (!lexed_comment) parser_lex_ignored_newline(parser);
10874 lex_state_set(parser, PM_LEX_STATE_DOT);
10875 parser->current.start = next_content;
10876 parser->current.end = next_content + 1;
10877 parser->next_start = NULL;
10878 LEX(PM_TOKEN_DOT);
10879 }
10880
10881 // If we hit a &. after a newline, then we're in a call chain and
10882 // we need to return the call operator.
10883 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10884 if (!lexed_comment) parser_lex_ignored_newline(parser);
10885 lex_state_set(parser, PM_LEX_STATE_DOT);
10886 parser->current.start = next_content;
10887 parser->current.end = next_content + 2;
10888 parser->next_start = NULL;
10890 }
10891 }
10892
10893 // At this point we know this is a regular newline, and we can set the
10894 // necessary state and return the token.
10895 lex_state_set(parser, PM_LEX_STATE_BEG);
10896 parser->command_start = true;
10897 parser->current.type = PM_TOKEN_NEWLINE;
10898 if (!lexed_comment) parser_lex_callback(parser);
10899 return;
10900 }
10901
10902 // ,
10903 case ',':
10904 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10905 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10906 }
10907
10908 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10909 LEX(PM_TOKEN_COMMA);
10910
10911 // (
10912 case '(': {
10914
10915 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10917 }
10918
10919 parser->enclosure_nesting++;
10920 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10921 pm_do_loop_stack_push(parser, false);
10922 LEX(type);
10923 }
10924
10925 // )
10926 case ')':
10927 parser->enclosure_nesting--;
10928 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10929 pm_do_loop_stack_pop(parser);
10931
10932 // ;
10933 case ';':
10934 lex_state_set(parser, PM_LEX_STATE_BEG);
10935 parser->command_start = true;
10936 LEX(PM_TOKEN_SEMICOLON);
10937
10938 // [ [] []=
10939 case '[':
10940 parser->enclosure_nesting++;
10942
10943 if (lex_state_operator_p(parser)) {
10944 if (match(parser, ']')) {
10945 parser->enclosure_nesting--;
10946 lex_state_set(parser, PM_LEX_STATE_ARG);
10948 }
10949
10950 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10951 LEX(type);
10952 }
10953
10954 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10956 }
10957
10958 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10959 pm_do_loop_stack_push(parser, false);
10960 LEX(type);
10961
10962 // ]
10963 case ']':
10964 parser->enclosure_nesting--;
10965 lex_state_set(parser, PM_LEX_STATE_END);
10966 pm_do_loop_stack_pop(parser);
10968
10969 // {
10970 case '{': {
10972
10973 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10974 // This { begins a lambda
10975 parser->command_start = true;
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10978 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10979 // This { begins a hash literal
10980 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10981 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10982 // This { begins a block
10983 parser->command_start = true;
10984 lex_state_set(parser, PM_LEX_STATE_BEG);
10985 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10986 // This { begins a block on a command
10987 parser->command_start = true;
10988 lex_state_set(parser, PM_LEX_STATE_BEG);
10989 } else {
10990 // This { begins a hash literal
10991 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10992 }
10993
10994 parser->enclosure_nesting++;
10995 parser->brace_nesting++;
10996 pm_do_loop_stack_push(parser, false);
10997
10998 LEX(type);
10999 }
11000
11001 // }
11002 case '}':
11003 parser->enclosure_nesting--;
11004 pm_do_loop_stack_pop(parser);
11005
11006 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11007 lex_mode_pop(parser);
11009 }
11010
11011 parser->brace_nesting--;
11012 lex_state_set(parser, PM_LEX_STATE_END);
11014
11015 // * ** **= *=
11016 case '*': {
11017 if (match(parser, '*')) {
11018 if (match(parser, '=')) {
11019 lex_state_set(parser, PM_LEX_STATE_BEG);
11021 }
11022
11024
11025 if (lex_state_spcarg_p(parser, space_seen)) {
11026 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11028 } else if (lex_state_beg_p(parser)) {
11030 } else if (ambiguous_operator_p(parser, space_seen)) {
11031 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11032 }
11033
11034 if (lex_state_operator_p(parser)) {
11035 lex_state_set(parser, PM_LEX_STATE_ARG);
11036 } else {
11037 lex_state_set(parser, PM_LEX_STATE_BEG);
11038 }
11039
11040 LEX(type);
11041 }
11042
11043 if (match(parser, '=')) {
11044 lex_state_set(parser, PM_LEX_STATE_BEG);
11046 }
11047
11049
11050 if (lex_state_spcarg_p(parser, space_seen)) {
11051 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11053 } else if (lex_state_beg_p(parser)) {
11055 } else if (ambiguous_operator_p(parser, space_seen)) {
11056 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11057 }
11058
11059 if (lex_state_operator_p(parser)) {
11060 lex_state_set(parser, PM_LEX_STATE_ARG);
11061 } else {
11062 lex_state_set(parser, PM_LEX_STATE_BEG);
11063 }
11064
11065 LEX(type);
11066 }
11067
11068 // ! != !~ !@
11069 case '!':
11070 if (lex_state_operator_p(parser)) {
11071 lex_state_set(parser, PM_LEX_STATE_ARG);
11072 if (match(parser, '@')) {
11073 LEX(PM_TOKEN_BANG);
11074 }
11075 } else {
11076 lex_state_set(parser, PM_LEX_STATE_BEG);
11077 }
11078
11079 if (match(parser, '=')) {
11081 }
11082
11083 if (match(parser, '~')) {
11085 }
11086
11087 LEX(PM_TOKEN_BANG);
11088
11089 // = => =~ == === =begin
11090 case '=':
11091 if (
11092 current_token_starts_line(parser) &&
11093 (parser->current.end + 5 <= parser->end) &&
11094 memcmp(parser->current.end, "begin", 5) == 0 &&
11095 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11096 ) {
11097 pm_token_type_t type = lex_embdoc(parser);
11098 if (type == PM_TOKEN_EOF) {
11099 LEX(type);
11100 }
11101
11102 goto lex_next_token;
11103 }
11104
11105 if (lex_state_operator_p(parser)) {
11106 lex_state_set(parser, PM_LEX_STATE_ARG);
11107 } else {
11108 lex_state_set(parser, PM_LEX_STATE_BEG);
11109 }
11110
11111 if (match(parser, '>')) {
11113 }
11114
11115 if (match(parser, '~')) {
11117 }
11118
11119 if (match(parser, '=')) {
11120 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11121 }
11122
11123 LEX(PM_TOKEN_EQUAL);
11124
11125 // < << <<= <= <=>
11126 case '<':
11127 if (match(parser, '<')) {
11128 if (
11129 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11130 !lex_state_end_p(parser) &&
11131 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11132 ) {
11133 const uint8_t *end = parser->current.end;
11134
11135 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11136 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11137
11138 if (match(parser, '-')) {
11139 indent = PM_HEREDOC_INDENT_DASH;
11140 }
11141 else if (match(parser, '~')) {
11142 indent = PM_HEREDOC_INDENT_TILDE;
11143 }
11144
11145 if (match(parser, '`')) {
11146 quote = PM_HEREDOC_QUOTE_BACKTICK;
11147 }
11148 else if (match(parser, '"')) {
11149 quote = PM_HEREDOC_QUOTE_DOUBLE;
11150 }
11151 else if (match(parser, '\'')) {
11152 quote = PM_HEREDOC_QUOTE_SINGLE;
11153 }
11154
11155 const uint8_t *ident_start = parser->current.end;
11156 size_t width = 0;
11157
11158 if (parser->current.end >= parser->end) {
11159 parser->current.end = end;
11160 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11161 parser->current.end = end;
11162 } else {
11163 if (quote == PM_HEREDOC_QUOTE_NONE) {
11164 parser->current.end += width;
11165
11166 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11167 parser->current.end += width;
11168 }
11169 } else {
11170 // If we have quotes, then we're going to go until we find the
11171 // end quote.
11172 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11173 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11174 parser->current.end++;
11175 }
11176 }
11177
11178 size_t ident_length = (size_t) (parser->current.end - ident_start);
11179 bool ident_error = false;
11180
11181 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11182 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11183 ident_error = true;
11184 }
11185
11186 parser->explicit_encoding = NULL;
11187 lex_mode_push(parser, (pm_lex_mode_t) {
11188 .mode = PM_LEX_HEREDOC,
11189 .as.heredoc = {
11190 .base = {
11191 .ident_start = ident_start,
11192 .ident_length = ident_length,
11193 .quote = quote,
11194 .indent = indent
11195 },
11196 .next_start = parser->current.end,
11197 .common_whitespace = NULL,
11198 .line_continuation = false
11199 }
11200 });
11201
11202 if (parser->heredoc_end == NULL) {
11203 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11204
11205 if (body_start == NULL) {
11206 // If there is no newline after the heredoc identifier, then
11207 // this is not a valid heredoc declaration. In this case we
11208 // will add an error, but we will still return a heredoc
11209 // start.
11210 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11211 body_start = parser->end;
11212 } else {
11213 // Otherwise, we want to indicate that the body of the
11214 // heredoc starts on the character after the next newline.
11215 pm_newline_list_append(&parser->newline_list, body_start);
11216 body_start++;
11217 }
11218
11219 parser->next_start = body_start;
11220 } else {
11221 parser->next_start = parser->heredoc_end;
11222 }
11223
11225 }
11226 }
11227
11228 if (match(parser, '=')) {
11229 lex_state_set(parser, PM_LEX_STATE_BEG);
11231 }
11232
11233 if (ambiguous_operator_p(parser, space_seen)) {
11234 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11235 }
11236
11237 if (lex_state_operator_p(parser)) {
11238 lex_state_set(parser, PM_LEX_STATE_ARG);
11239 } else {
11240 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11241 lex_state_set(parser, PM_LEX_STATE_BEG);
11242 }
11243
11244 LEX(PM_TOKEN_LESS_LESS);
11245 }
11246
11247 if (lex_state_operator_p(parser)) {
11248 lex_state_set(parser, PM_LEX_STATE_ARG);
11249 } else {
11250 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11251 lex_state_set(parser, PM_LEX_STATE_BEG);
11252 }
11253
11254 if (match(parser, '=')) {
11255 if (match(parser, '>')) {
11257 }
11258
11260 }
11261
11262 LEX(PM_TOKEN_LESS);
11263
11264 // > >> >>= >=
11265 case '>':
11266 if (match(parser, '>')) {
11267 if (lex_state_operator_p(parser)) {
11268 lex_state_set(parser, PM_LEX_STATE_ARG);
11269 } else {
11270 lex_state_set(parser, PM_LEX_STATE_BEG);
11271 }
11272 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11273 }
11274
11275 if (lex_state_operator_p(parser)) {
11276 lex_state_set(parser, PM_LEX_STATE_ARG);
11277 } else {
11278 lex_state_set(parser, PM_LEX_STATE_BEG);
11279 }
11280
11281 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11282
11283 // double-quoted string literal
11284 case '"': {
11285 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11286 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11288 }
11289
11290 // xstring literal
11291 case '`': {
11292 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11293 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11294 LEX(PM_TOKEN_BACKTICK);
11295 }
11296
11297 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11298 if (previous_command_start) {
11299 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11300 } else {
11301 lex_state_set(parser, PM_LEX_STATE_ARG);
11302 }
11303
11304 LEX(PM_TOKEN_BACKTICK);
11305 }
11306
11307 lex_mode_push_string(parser, true, false, '\0', '`');
11308 LEX(PM_TOKEN_BACKTICK);
11309 }
11310
11311 // single-quoted string literal
11312 case '\'': {
11313 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11314 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11316 }
11317
11318 // ? character literal
11319 case '?':
11320 LEX(lex_question_mark(parser));
11321
11322 // & && &&= &=
11323 case '&': {
11324 if (match(parser, '&')) {
11325 lex_state_set(parser, PM_LEX_STATE_BEG);
11326
11327 if (match(parser, '=')) {
11329 }
11330
11332 }
11333
11334 if (match(parser, '=')) {
11335 lex_state_set(parser, PM_LEX_STATE_BEG);
11337 }
11338
11339 if (match(parser, '.')) {
11340 lex_state_set(parser, PM_LEX_STATE_DOT);
11342 }
11343
11345 if (lex_state_spcarg_p(parser, space_seen)) {
11346 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11347 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11348 } else {
11349 const uint8_t delim = peek_offset(parser, 1);
11350
11351 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11352 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11353 }
11354 }
11355
11357 } else if (lex_state_beg_p(parser)) {
11359 } else if (ambiguous_operator_p(parser, space_seen)) {
11360 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11361 }
11362
11363 if (lex_state_operator_p(parser)) {
11364 lex_state_set(parser, PM_LEX_STATE_ARG);
11365 } else {
11366 lex_state_set(parser, PM_LEX_STATE_BEG);
11367 }
11368
11369 LEX(type);
11370 }
11371
11372 // | || ||= |=
11373 case '|':
11374 if (match(parser, '|')) {
11375 if (match(parser, '=')) {
11376 lex_state_set(parser, PM_LEX_STATE_BEG);
11378 }
11379
11380 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11381 parser->current.end--;
11382 LEX(PM_TOKEN_PIPE);
11383 }
11384
11385 lex_state_set(parser, PM_LEX_STATE_BEG);
11386 LEX(PM_TOKEN_PIPE_PIPE);
11387 }
11388
11389 if (match(parser, '=')) {
11390 lex_state_set(parser, PM_LEX_STATE_BEG);
11392 }
11393
11394 if (lex_state_operator_p(parser)) {
11395 lex_state_set(parser, PM_LEX_STATE_ARG);
11396 } else {
11397 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11398 }
11399
11400 LEX(PM_TOKEN_PIPE);
11401
11402 // + += +@
11403 case '+': {
11404 if (lex_state_operator_p(parser)) {
11405 lex_state_set(parser, PM_LEX_STATE_ARG);
11406
11407 if (match(parser, '@')) {
11408 LEX(PM_TOKEN_UPLUS);
11409 }
11410
11411 LEX(PM_TOKEN_PLUS);
11412 }
11413
11414 if (match(parser, '=')) {
11415 lex_state_set(parser, PM_LEX_STATE_BEG);
11417 }
11418
11419 if (
11420 lex_state_beg_p(parser) ||
11421 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11422 ) {
11423 lex_state_set(parser, PM_LEX_STATE_BEG);
11424
11425 if (pm_char_is_decimal_digit(peek(parser))) {
11426 parser->current.end++;
11427 pm_token_type_t type = lex_numeric(parser);
11428 lex_state_set(parser, PM_LEX_STATE_END);
11429 LEX(type);
11430 }
11431
11432 LEX(PM_TOKEN_UPLUS);
11433 }
11434
11435 if (ambiguous_operator_p(parser, space_seen)) {
11436 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11437 }
11438
11439 lex_state_set(parser, PM_LEX_STATE_BEG);
11440 LEX(PM_TOKEN_PLUS);
11441 }
11442
11443 // - -= -@
11444 case '-': {
11445 if (lex_state_operator_p(parser)) {
11446 lex_state_set(parser, PM_LEX_STATE_ARG);
11447
11448 if (match(parser, '@')) {
11449 LEX(PM_TOKEN_UMINUS);
11450 }
11451
11452 LEX(PM_TOKEN_MINUS);
11453 }
11454
11455 if (match(parser, '=')) {
11456 lex_state_set(parser, PM_LEX_STATE_BEG);
11458 }
11459
11460 if (match(parser, '>')) {
11461 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11463 }
11464
11465 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11466 bool is_beg = lex_state_beg_p(parser);
11467 if (!is_beg && spcarg) {
11468 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11469 }
11470
11471 if (is_beg || spcarg) {
11472 lex_state_set(parser, PM_LEX_STATE_BEG);
11473 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11474 }
11475
11476 if (ambiguous_operator_p(parser, space_seen)) {
11477 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11478 }
11479
11480 lex_state_set(parser, PM_LEX_STATE_BEG);
11481 LEX(PM_TOKEN_MINUS);
11482 }
11483
11484 // . .. ...
11485 case '.': {
11486 bool beg_p = lex_state_beg_p(parser);
11487
11488 if (match(parser, '.')) {
11489 if (match(parser, '.')) {
11490 // If we're _not_ inside a range within default parameters
11491 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11492 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11493 lex_state_set(parser, PM_LEX_STATE_BEG);
11494 } else {
11495 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11496 }
11498 }
11499
11500 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11501 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11502 }
11503
11504 lex_state_set(parser, PM_LEX_STATE_BEG);
11506 }
11507
11508 lex_state_set(parser, PM_LEX_STATE_BEG);
11509 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11510 }
11511
11512 lex_state_set(parser, PM_LEX_STATE_DOT);
11513 LEX(PM_TOKEN_DOT);
11514 }
11515
11516 // integer
11517 case '0':
11518 case '1':
11519 case '2':
11520 case '3':
11521 case '4':
11522 case '5':
11523 case '6':
11524 case '7':
11525 case '8':
11526 case '9': {
11527 pm_token_type_t type = lex_numeric(parser);
11528 lex_state_set(parser, PM_LEX_STATE_END);
11529 LEX(type);
11530 }
11531
11532 // :: symbol
11533 case ':':
11534 if (match(parser, ':')) {
11535 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11536 lex_state_set(parser, PM_LEX_STATE_BEG);
11538 }
11539
11540 lex_state_set(parser, PM_LEX_STATE_DOT);
11542 }
11543
11544 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11545 lex_state_set(parser, PM_LEX_STATE_BEG);
11546 LEX(PM_TOKEN_COLON);
11547 }
11548
11549 if (peek(parser) == '"' || peek(parser) == '\'') {
11550 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11551 parser->current.end++;
11552 }
11553
11554 lex_state_set(parser, PM_LEX_STATE_FNAME);
11556
11557 // / /=
11558 case '/':
11559 if (lex_state_beg_p(parser)) {
11560 lex_mode_push_regexp(parser, '\0', '/');
11562 }
11563
11564 if (match(parser, '=')) {
11565 lex_state_set(parser, PM_LEX_STATE_BEG);
11567 }
11568
11569 if (lex_state_spcarg_p(parser, space_seen)) {
11570 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11571 lex_mode_push_regexp(parser, '\0', '/');
11573 }
11574
11575 if (ambiguous_operator_p(parser, space_seen)) {
11576 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11577 }
11578
11579 if (lex_state_operator_p(parser)) {
11580 lex_state_set(parser, PM_LEX_STATE_ARG);
11581 } else {
11582 lex_state_set(parser, PM_LEX_STATE_BEG);
11583 }
11584
11585 LEX(PM_TOKEN_SLASH);
11586
11587 // ^ ^=
11588 case '^':
11589 if (lex_state_operator_p(parser)) {
11590 lex_state_set(parser, PM_LEX_STATE_ARG);
11591 } else {
11592 lex_state_set(parser, PM_LEX_STATE_BEG);
11593 }
11594 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11595
11596 // ~ ~@
11597 case '~':
11598 if (lex_state_operator_p(parser)) {
11599 (void) match(parser, '@');
11600 lex_state_set(parser, PM_LEX_STATE_ARG);
11601 } else {
11602 lex_state_set(parser, PM_LEX_STATE_BEG);
11603 }
11604
11605 LEX(PM_TOKEN_TILDE);
11606
11607 // % %= %i %I %q %Q %w %W
11608 case '%': {
11609 // If there is no subsequent character then we have an
11610 // invalid token. We're going to say it's the percent
11611 // operator because we don't want to move into the string
11612 // lex mode unnecessarily.
11613 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11614 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11615 LEX(PM_TOKEN_PERCENT);
11616 }
11617
11618 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11619 lex_state_set(parser, PM_LEX_STATE_BEG);
11621 } else if (
11622 lex_state_beg_p(parser) ||
11623 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11624 lex_state_spcarg_p(parser, space_seen)
11625 ) {
11626 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11627 if (*parser->current.end >= 0x80) {
11628 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11629 }
11630
11631 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11632 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11634 }
11635
11636 // Delimiters for %-literals cannot be alphanumeric. We
11637 // validate that here.
11638 uint8_t delimiter = peek_offset(parser, 1);
11639 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11640 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11641 goto lex_next_token;
11642 }
11643
11644 switch (peek(parser)) {
11645 case 'i': {
11646 parser->current.end++;
11647
11648 if (parser->current.end < parser->end) {
11649 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11650 } else {
11651 lex_mode_push_list_eof(parser);
11652 }
11653
11655 }
11656 case 'I': {
11657 parser->current.end++;
11658
11659 if (parser->current.end < parser->end) {
11660 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11661 } else {
11662 lex_mode_push_list_eof(parser);
11663 }
11664
11666 }
11667 case 'r': {
11668 parser->current.end++;
11669
11670 if (parser->current.end < parser->end) {
11671 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11672 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11673 } else {
11674 lex_mode_push_regexp(parser, '\0', '\0');
11675 }
11676
11678 }
11679 case 'q': {
11680 parser->current.end++;
11681
11682 if (parser->current.end < parser->end) {
11683 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11684 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11685 } else {
11686 lex_mode_push_string_eof(parser);
11687 }
11688
11690 }
11691 case 'Q': {
11692 parser->current.end++;
11693
11694 if (parser->current.end < parser->end) {
11695 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11696 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11697 } else {
11698 lex_mode_push_string_eof(parser);
11699 }
11700
11702 }
11703 case 's': {
11704 parser->current.end++;
11705
11706 if (parser->current.end < parser->end) {
11707 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11708 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11709 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11710 } else {
11711 lex_mode_push_string_eof(parser);
11712 }
11713
11715 }
11716 case 'w': {
11717 parser->current.end++;
11718
11719 if (parser->current.end < parser->end) {
11720 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11721 } else {
11722 lex_mode_push_list_eof(parser);
11723 }
11724
11726 }
11727 case 'W': {
11728 parser->current.end++;
11729
11730 if (parser->current.end < parser->end) {
11731 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11732 } else {
11733 lex_mode_push_list_eof(parser);
11734 }
11735
11737 }
11738 case 'x': {
11739 parser->current.end++;
11740
11741 if (parser->current.end < parser->end) {
11742 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11743 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11744 } else {
11745 lex_mode_push_string_eof(parser);
11746 }
11747
11749 }
11750 default:
11751 // If we get to this point, then we have a % that is completely
11752 // unparsable. In this case we'll just drop it from the parser
11753 // and skip past it and hope that the next token is something
11754 // that we can parse.
11755 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11756 goto lex_next_token;
11757 }
11758 }
11759
11760 if (ambiguous_operator_p(parser, space_seen)) {
11761 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11762 }
11763
11764 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11765 LEX(PM_TOKEN_PERCENT);
11766 }
11767
11768 // global variable
11769 case '$': {
11770 pm_token_type_t type = lex_global_variable(parser);
11771
11772 // If we're lexing an embedded variable, then we need to pop back into
11773 // the parent lex context.
11774 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11775 lex_mode_pop(parser);
11776 }
11777
11778 lex_state_set(parser, PM_LEX_STATE_END);
11779 LEX(type);
11780 }
11781
11782 // instance variable, class variable
11783 case '@':
11784 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11785 LEX(lex_at_variable(parser));
11786
11787 default: {
11788 if (*parser->current.start != '_') {
11789 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11790
11791 // If this isn't the beginning of an identifier, then
11792 // it's an invalid token as we've exhausted all of the
11793 // other options. We'll skip past it and return the next
11794 // token after adding an appropriate error message.
11795 if (!width) {
11796 if (*parser->current.start >= 0x80) {
11797 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11798 } else if (*parser->current.start == '\\') {
11799 switch (peek_at(parser, parser->current.start + 1)) {
11800 case ' ':
11801 parser->current.end++;
11802 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11803 break;
11804 case '\f':
11805 parser->current.end++;
11806 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11807 break;
11808 case '\t':
11809 parser->current.end++;
11810 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11811 break;
11812 case '\v':
11813 parser->current.end++;
11814 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11815 break;
11816 case '\r':
11817 if (peek_at(parser, parser->current.start + 2) != '\n') {
11818 parser->current.end++;
11819 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11820 break;
11821 }
11823 default:
11824 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11825 break;
11826 }
11827 } else if (char_is_ascii_printable(*parser->current.start)) {
11828 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11829 } else {
11830 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11831 }
11832
11833 goto lex_next_token;
11834 }
11835
11836 parser->current.end = parser->current.start + width;
11837 }
11838
11839 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11840
11841 // If we've hit a __END__ and it was at the start of the
11842 // line or the start of the file and it is followed by
11843 // either a \n or a \r\n, then this is the last token of the
11844 // file.
11845 if (
11846 ((parser->current.end - parser->current.start) == 7) &&
11847 current_token_starts_line(parser) &&
11848 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11849 (parser->current.end == parser->end || match_eol(parser))
11850 ) {
11851 // Since we know we're about to add an __END__ comment,
11852 // we know we need to add all of the newlines to get the
11853 // correct column information for it.
11854 const uint8_t *cursor = parser->current.end;
11855 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11856 pm_newline_list_append(&parser->newline_list, cursor++);
11857 }
11858
11859 parser->current.end = parser->end;
11860 parser->current.type = PM_TOKEN___END__;
11861 parser_lex_callback(parser);
11862
11863 parser->data_loc.start = parser->current.start;
11864 parser->data_loc.end = parser->current.end;
11865
11866 LEX(PM_TOKEN_EOF);
11867 }
11868
11869 pm_lex_state_t last_state = parser->lex_state;
11870
11872 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11873 if (previous_command_start) {
11874 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11875 } else {
11876 lex_state_set(parser, PM_LEX_STATE_ARG);
11877 }
11878 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11879 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11880 } else {
11881 lex_state_set(parser, PM_LEX_STATE_END);
11882 }
11883 }
11884
11885 if (
11886 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11888 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11889 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11890 ) {
11891 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11892 }
11893
11894 LEX(type);
11895 }
11896 }
11897 }
11898 case PM_LEX_LIST: {
11899 if (parser->next_start != NULL) {
11900 parser->current.end = parser->next_start;
11901 parser->next_start = NULL;
11902 }
11903
11904 // First we'll set the beginning of the token.
11905 parser->current.start = parser->current.end;
11906
11907 // If there's any whitespace at the start of the list, then we're
11908 // going to trim it off the beginning and create a new token.
11909 size_t whitespace;
11910
11911 if (parser->heredoc_end) {
11912 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11913 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11914 whitespace += 1;
11915 }
11916 } else {
11917 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11918 }
11919
11920 if (whitespace > 0) {
11921 parser->current.end += whitespace;
11922 if (peek_offset(parser, -1) == '\n') {
11923 // mutates next_start
11924 parser_flush_heredoc_end(parser);
11925 }
11926 LEX(PM_TOKEN_WORDS_SEP);
11927 }
11928
11929 // We'll check if we're at the end of the file. If we are, then we
11930 // need to return the EOF token.
11931 if (parser->current.end >= parser->end) {
11932 LEX(PM_TOKEN_EOF);
11933 }
11934
11935 // Here we'll get a list of the places where strpbrk should break,
11936 // and then find the first one.
11937 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11938 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11939 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11940
11941 // If we haven't found an escape yet, then this buffer will be
11942 // unallocated since we can refer directly to the source string.
11943 pm_token_buffer_t token_buffer = { 0 };
11944
11945 while (breakpoint != NULL) {
11946 // If we hit whitespace, then we must have received content by
11947 // now, so we can return an element of the list.
11948 if (pm_char_is_whitespace(*breakpoint)) {
11949 parser->current.end = breakpoint;
11950 pm_token_buffer_flush(parser, &token_buffer);
11952 }
11953
11954 // If we hit the terminator, we need to check which token to
11955 // return.
11956 if (*breakpoint == lex_mode->as.list.terminator) {
11957 // If this terminator doesn't actually close the list, then
11958 // we need to continue on past it.
11959 if (lex_mode->as.list.nesting > 0) {
11960 parser->current.end = breakpoint + 1;
11961 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11962 lex_mode->as.list.nesting--;
11963 continue;
11964 }
11965
11966 // If we've hit the terminator and we've already skipped
11967 // past content, then we can return a list node.
11968 if (breakpoint > parser->current.start) {
11969 parser->current.end = breakpoint;
11970 pm_token_buffer_flush(parser, &token_buffer);
11972 }
11973
11974 // Otherwise, switch back to the default state and return
11975 // the end of the list.
11976 parser->current.end = breakpoint + 1;
11977 lex_mode_pop(parser);
11978 lex_state_set(parser, PM_LEX_STATE_END);
11980 }
11981
11982 // If we hit a null byte, skip directly past it.
11983 if (*breakpoint == '\0') {
11984 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11985 continue;
11986 }
11987
11988 // If we hit escapes, then we need to treat the next token
11989 // literally. In this case we'll skip past the next character
11990 // and find the next breakpoint.
11991 if (*breakpoint == '\\') {
11992 parser->current.end = breakpoint + 1;
11993
11994 // If we've hit the end of the file, then break out of the
11995 // loop by setting the breakpoint to NULL.
11996 if (parser->current.end == parser->end) {
11997 breakpoint = NULL;
11998 continue;
11999 }
12000
12001 pm_token_buffer_escape(parser, &token_buffer);
12002 uint8_t peeked = peek(parser);
12003
12004 switch (peeked) {
12005 case ' ':
12006 case '\f':
12007 case '\t':
12008 case '\v':
12009 case '\\':
12010 pm_token_buffer_push_byte(&token_buffer, peeked);
12011 parser->current.end++;
12012 break;
12013 case '\r':
12014 parser->current.end++;
12015 if (peek(parser) != '\n') {
12016 pm_token_buffer_push_byte(&token_buffer, '\r');
12017 break;
12018 }
12020 case '\n':
12021 pm_token_buffer_push_byte(&token_buffer, '\n');
12022
12023 if (parser->heredoc_end) {
12024 // ... if we are on the same line as a heredoc,
12025 // flush the heredoc and continue parsing after
12026 // heredoc_end.
12027 parser_flush_heredoc_end(parser);
12028 pm_token_buffer_copy(parser, &token_buffer);
12030 } else {
12031 // ... else track the newline.
12032 pm_newline_list_append(&parser->newline_list, parser->current.end);
12033 }
12034
12035 parser->current.end++;
12036 break;
12037 default:
12038 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12039 pm_token_buffer_push_byte(&token_buffer, peeked);
12040 parser->current.end++;
12041 } else if (lex_mode->as.list.interpolation) {
12042 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12043 } else {
12044 pm_token_buffer_push_byte(&token_buffer, '\\');
12045 pm_token_buffer_push_escaped(&token_buffer, parser);
12046 }
12047
12048 break;
12049 }
12050
12051 token_buffer.cursor = parser->current.end;
12052 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12053 continue;
12054 }
12055
12056 // If we hit a #, then we will attempt to lex interpolation.
12057 if (*breakpoint == '#') {
12058 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12059
12060 if (type == PM_TOKEN_NOT_PROVIDED) {
12061 // If we haven't returned at this point then we had something
12062 // that looked like an interpolated class or instance variable
12063 // like "#@" but wasn't actually. In this case we'll just skip
12064 // to the next breakpoint.
12065 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12066 continue;
12067 }
12068
12070 pm_token_buffer_flush(parser, &token_buffer);
12071 }
12072
12073 LEX(type);
12074 }
12075
12076 // If we've hit the incrementor, then we need to skip past it
12077 // and find the next breakpoint.
12078 assert(*breakpoint == lex_mode->as.list.incrementor);
12079 parser->current.end = breakpoint + 1;
12080 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12081 lex_mode->as.list.nesting++;
12082 continue;
12083 }
12084
12085 if (parser->current.end > parser->current.start) {
12086 pm_token_buffer_flush(parser, &token_buffer);
12088 }
12089
12090 // If we were unable to find a breakpoint, then this token hits the
12091 // end of the file.
12092 parser->current.end = parser->end;
12093 pm_token_buffer_flush(parser, &token_buffer);
12095 }
12096 case PM_LEX_REGEXP: {
12097 // First, we'll set to start of this token to be the current end.
12098 if (parser->next_start == NULL) {
12099 parser->current.start = parser->current.end;
12100 } else {
12101 parser->current.start = parser->next_start;
12102 parser->current.end = parser->next_start;
12103 parser->next_start = NULL;
12104 }
12105
12106 // We'll check if we're at the end of the file. If we are, then we
12107 // need to return the EOF token.
12108 if (parser->current.end >= parser->end) {
12109 LEX(PM_TOKEN_EOF);
12110 }
12111
12112 // Get a reference to the current mode.
12113 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12114
12115 // These are the places where we need to split up the content of the
12116 // regular expression. We'll use strpbrk to find the first of these
12117 // characters.
12118 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12119 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12120 pm_regexp_token_buffer_t token_buffer = { 0 };
12121
12122 while (breakpoint != NULL) {
12123 uint8_t term = lex_mode->as.regexp.terminator;
12124 bool is_terminator = (*breakpoint == term);
12125
12126 // If the terminator is newline, we need to consider \r\n _also_ a newline
12127 // For example: `%\nfoo\r\n`
12128 // The string should be "foo", not "foo\r"
12129 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12130 if (term == '\n') {
12131 is_terminator = true;
12132 }
12133
12134 // If the terminator is a CR, but we see a CRLF, we need to
12135 // treat the CRLF as a newline, meaning this is _not_ the
12136 // terminator
12137 if (term == '\r') {
12138 is_terminator = false;
12139 }
12140 }
12141
12142 // If we hit the terminator, we need to determine what kind of
12143 // token to return.
12144 if (is_terminator) {
12145 if (lex_mode->as.regexp.nesting > 0) {
12146 parser->current.end = breakpoint + 1;
12147 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12148 lex_mode->as.regexp.nesting--;
12149 continue;
12150 }
12151
12152 // Here we've hit the terminator. If we have already consumed
12153 // content then we need to return that content as string content
12154 // first.
12155 if (breakpoint > parser->current.start) {
12156 parser->current.end = breakpoint;
12157 pm_regexp_token_buffer_flush(parser, &token_buffer);
12159 }
12160
12161 // Check here if we need to track the newline.
12162 size_t eol_length = match_eol_at(parser, breakpoint);
12163 if (eol_length) {
12164 parser->current.end = breakpoint + eol_length;
12165 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12166 } else {
12167 parser->current.end = breakpoint + 1;
12168 }
12169
12170 // Since we've hit the terminator of the regular expression,
12171 // we now need to parse the options.
12172 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12173
12174 lex_mode_pop(parser);
12175 lex_state_set(parser, PM_LEX_STATE_END);
12177 }
12178
12179 // If we've hit the incrementor, then we need to skip past it
12180 // and find the next breakpoint.
12181 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12182 parser->current.end = breakpoint + 1;
12183 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12184 lex_mode->as.regexp.nesting++;
12185 continue;
12186 }
12187
12188 switch (*breakpoint) {
12189 case '\0':
12190 // If we hit a null byte, skip directly past it.
12191 parser->current.end = breakpoint + 1;
12192 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12193 break;
12194 case '\r':
12195 if (peek_at(parser, breakpoint + 1) != '\n') {
12196 parser->current.end = breakpoint + 1;
12197 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12198 break;
12199 }
12200
12201 breakpoint++;
12202 parser->current.end = breakpoint;
12203 pm_regexp_token_buffer_escape(parser, &token_buffer);
12204 token_buffer.base.cursor = breakpoint;
12205
12207 case '\n':
12208 // If we've hit a newline, then we need to track that in
12209 // the list of newlines.
12210 if (parser->heredoc_end == NULL) {
12211 pm_newline_list_append(&parser->newline_list, breakpoint);
12212 parser->current.end = breakpoint + 1;
12213 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12214 break;
12215 }
12216
12217 parser->current.end = breakpoint + 1;
12218 parser_flush_heredoc_end(parser);
12219 pm_regexp_token_buffer_flush(parser, &token_buffer);
12221 case '\\': {
12222 // If we hit escapes, then we need to treat the next
12223 // token literally. In this case we'll skip past the
12224 // next character and find the next breakpoint.
12225 parser->current.end = breakpoint + 1;
12226
12227 // If we've hit the end of the file, then break out of
12228 // the loop by setting the breakpoint to NULL.
12229 if (parser->current.end == parser->end) {
12230 breakpoint = NULL;
12231 break;
12232 }
12233
12234 pm_regexp_token_buffer_escape(parser, &token_buffer);
12235 uint8_t peeked = peek(parser);
12236
12237 switch (peeked) {
12238 case '\r':
12239 parser->current.end++;
12240 if (peek(parser) != '\n') {
12241 if (lex_mode->as.regexp.terminator != '\r') {
12242 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12243 }
12244 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12245 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12246 break;
12247 }
12249 case '\n':
12250 if (parser->heredoc_end) {
12251 // ... if we are on the same line as a heredoc,
12252 // flush the heredoc and continue parsing after
12253 // heredoc_end.
12254 parser_flush_heredoc_end(parser);
12255 pm_regexp_token_buffer_copy(parser, &token_buffer);
12257 } else {
12258 // ... else track the newline.
12259 pm_newline_list_append(&parser->newline_list, parser->current.end);
12260 }
12261
12262 parser->current.end++;
12263 break;
12264 case 'c':
12265 case 'C':
12266 case 'M':
12267 case 'u':
12268 case 'x':
12269 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12270 break;
12271 default:
12272 if (lex_mode->as.regexp.terminator == peeked) {
12273 // Some characters when they are used as the
12274 // terminator also receive an escape. They are
12275 // enumerated here.
12276 switch (peeked) {
12277 case '$': case ')': case '*': case '+':
12278 case '.': case '>': case '?': case ']':
12279 case '^': case '|': case '}':
12280 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12281 break;
12282 default:
12283 break;
12284 }
12285
12286 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12287 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12288 parser->current.end++;
12289 break;
12290 }
12291
12292 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12293 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12294 break;
12295 }
12296
12297 token_buffer.base.cursor = parser->current.end;
12298 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12299 break;
12300 }
12301 case '#': {
12302 // If we hit a #, then we will attempt to lex
12303 // interpolation.
12304 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12305
12306 if (type == PM_TOKEN_NOT_PROVIDED) {
12307 // If we haven't returned at this point then we had
12308 // something that looked like an interpolated class or
12309 // instance variable like "#@" but wasn't actually. In
12310 // this case we'll just skip to the next breakpoint.
12311 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12312 break;
12313 }
12314
12316 pm_regexp_token_buffer_flush(parser, &token_buffer);
12317 }
12318
12319 LEX(type);
12320 }
12321 default:
12322 assert(false && "unreachable");
12323 break;
12324 }
12325 }
12326
12327 if (parser->current.end > parser->current.start) {
12328 pm_regexp_token_buffer_flush(parser, &token_buffer);
12330 }
12331
12332 // If we were unable to find a breakpoint, then this token hits the
12333 // end of the file.
12334 parser->current.end = parser->end;
12335 pm_regexp_token_buffer_flush(parser, &token_buffer);
12337 }
12338 case PM_LEX_STRING: {
12339 // First, we'll set to start of this token to be the current end.
12340 if (parser->next_start == NULL) {
12341 parser->current.start = parser->current.end;
12342 } else {
12343 parser->current.start = parser->next_start;
12344 parser->current.end = parser->next_start;
12345 parser->next_start = NULL;
12346 }
12347
12348 // We'll check if we're at the end of the file. If we are, then we need to
12349 // return the EOF token.
12350 if (parser->current.end >= parser->end) {
12351 LEX(PM_TOKEN_EOF);
12352 }
12353
12354 // These are the places where we need to split up the content of the
12355 // string. We'll use strpbrk to find the first of these characters.
12356 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12357 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12358 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12359
12360 // If we haven't found an escape yet, then this buffer will be
12361 // unallocated since we can refer directly to the source string.
12362 pm_token_buffer_t token_buffer = { 0 };
12363
12364 while (breakpoint != NULL) {
12365 // If we hit the incrementor, then we'll increment then nesting and
12366 // continue lexing.
12367 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12368 lex_mode->as.string.nesting++;
12369 parser->current.end = breakpoint + 1;
12370 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12371 continue;
12372 }
12373
12374 uint8_t term = lex_mode->as.string.terminator;
12375 bool is_terminator = (*breakpoint == term);
12376
12377 // If the terminator is newline, we need to consider \r\n _also_ a newline
12378 // For example: `%r\nfoo\r\n`
12379 // The string should be /foo/, not /foo\r/
12380 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12381 if (term == '\n') {
12382 is_terminator = true;
12383 }
12384
12385 // If the terminator is a CR, but we see a CRLF, we need to
12386 // treat the CRLF as a newline, meaning this is _not_ the
12387 // terminator
12388 if (term == '\r') {
12389 is_terminator = false;
12390 }
12391 }
12392
12393 // Note that we have to check the terminator here first because we could
12394 // potentially be parsing a % string that has a # character as the
12395 // terminator.
12396 if (is_terminator) {
12397 // If this terminator doesn't actually close the string, then we need
12398 // to continue on past it.
12399 if (lex_mode->as.string.nesting > 0) {
12400 parser->current.end = breakpoint + 1;
12401 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12402 lex_mode->as.string.nesting--;
12403 continue;
12404 }
12405
12406 // Here we've hit the terminator. If we have already consumed content
12407 // then we need to return that content as string content first.
12408 if (breakpoint > parser->current.start) {
12409 parser->current.end = breakpoint;
12410 pm_token_buffer_flush(parser, &token_buffer);
12412 }
12413
12414 // Otherwise we need to switch back to the parent lex mode and
12415 // return the end of the string.
12416 size_t eol_length = match_eol_at(parser, breakpoint);
12417 if (eol_length) {
12418 parser->current.end = breakpoint + eol_length;
12419 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12420 } else {
12421 parser->current.end = breakpoint + 1;
12422 }
12423
12424 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12425 parser->current.end++;
12426 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12427 lex_mode_pop(parser);
12428 LEX(PM_TOKEN_LABEL_END);
12429 }
12430
12431 lex_state_set(parser, PM_LEX_STATE_END);
12432 lex_mode_pop(parser);
12434 }
12435
12436 switch (*breakpoint) {
12437 case '\0':
12438 // Skip directly past the null character.
12439 parser->current.end = breakpoint + 1;
12440 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12441 break;
12442 case '\r':
12443 if (peek_at(parser, breakpoint + 1) != '\n') {
12444 parser->current.end = breakpoint + 1;
12445 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12446 break;
12447 }
12448
12449 // If we hit a \r\n sequence, then we need to treat it
12450 // as a newline.
12451 breakpoint++;
12452 parser->current.end = breakpoint;
12453 pm_token_buffer_escape(parser, &token_buffer);
12454 token_buffer.cursor = breakpoint;
12455
12457 case '\n':
12458 // When we hit a newline, we need to flush any potential
12459 // heredocs. Note that this has to happen after we check
12460 // for the terminator in case the terminator is a
12461 // newline character.
12462 if (parser->heredoc_end == NULL) {
12463 pm_newline_list_append(&parser->newline_list, breakpoint);
12464 parser->current.end = breakpoint + 1;
12465 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12466 break;
12467 }
12468
12469 parser->current.end = breakpoint + 1;
12470 parser_flush_heredoc_end(parser);
12471 pm_token_buffer_flush(parser, &token_buffer);
12473 case '\\': {
12474 // Here we hit escapes.
12475 parser->current.end = breakpoint + 1;
12476
12477 // If we've hit the end of the file, then break out of
12478 // the loop by setting the breakpoint to NULL.
12479 if (parser->current.end == parser->end) {
12480 breakpoint = NULL;
12481 continue;
12482 }
12483
12484 pm_token_buffer_escape(parser, &token_buffer);
12485 uint8_t peeked = peek(parser);
12486
12487 switch (peeked) {
12488 case '\\':
12489 pm_token_buffer_push_byte(&token_buffer, '\\');
12490 parser->current.end++;
12491 break;
12492 case '\r':
12493 parser->current.end++;
12494 if (peek(parser) != '\n') {
12495 if (!lex_mode->as.string.interpolation) {
12496 pm_token_buffer_push_byte(&token_buffer, '\\');
12497 }
12498 pm_token_buffer_push_byte(&token_buffer, '\r');
12499 break;
12500 }
12502 case '\n':
12503 if (!lex_mode->as.string.interpolation) {
12504 pm_token_buffer_push_byte(&token_buffer, '\\');
12505 pm_token_buffer_push_byte(&token_buffer, '\n');
12506 }
12507
12508 if (parser->heredoc_end) {
12509 // ... if we are on the same line as a heredoc,
12510 // flush the heredoc and continue parsing after
12511 // heredoc_end.
12512 parser_flush_heredoc_end(parser);
12513 pm_token_buffer_copy(parser, &token_buffer);
12515 } else {
12516 // ... else track the newline.
12517 pm_newline_list_append(&parser->newline_list, parser->current.end);
12518 }
12519
12520 parser->current.end++;
12521 break;
12522 default:
12523 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12524 pm_token_buffer_push_byte(&token_buffer, peeked);
12525 parser->current.end++;
12526 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12527 pm_token_buffer_push_byte(&token_buffer, peeked);
12528 parser->current.end++;
12529 } else if (lex_mode->as.string.interpolation) {
12530 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12531 } else {
12532 pm_token_buffer_push_byte(&token_buffer, '\\');
12533 pm_token_buffer_push_escaped(&token_buffer, parser);
12534 }
12535
12536 break;
12537 }
12538
12539 token_buffer.cursor = parser->current.end;
12540 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12541 break;
12542 }
12543 case '#': {
12544 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12545
12546 if (type == PM_TOKEN_NOT_PROVIDED) {
12547 // If we haven't returned at this point then we had something that
12548 // looked like an interpolated class or instance variable like "#@"
12549 // but wasn't actually. In this case we'll just skip to the next
12550 // breakpoint.
12551 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12552 break;
12553 }
12554
12556 pm_token_buffer_flush(parser, &token_buffer);
12557 }
12558
12559 LEX(type);
12560 }
12561 default:
12562 assert(false && "unreachable");
12563 }
12564 }
12565
12566 if (parser->current.end > parser->current.start) {
12567 pm_token_buffer_flush(parser, &token_buffer);
12569 }
12570
12571 // If we've hit the end of the string, then this is an unterminated
12572 // string. In that case we'll return a string content token.
12573 parser->current.end = parser->end;
12574 pm_token_buffer_flush(parser, &token_buffer);
12576 }
12577 case PM_LEX_HEREDOC: {
12578 // First, we'll set to start of this token.
12579 if (parser->next_start == NULL) {
12580 parser->current.start = parser->current.end;
12581 } else {
12582 parser->current.start = parser->next_start;
12583 parser->current.end = parser->next_start;
12584 parser->heredoc_end = NULL;
12585 parser->next_start = NULL;
12586 }
12587
12588 // Now let's grab the information about the identifier off of the
12589 // current lex mode.
12590 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12591 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12592
12593 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12594 lex_mode->as.heredoc.line_continuation = false;
12595
12596 // We'll check if we're at the end of the file. If we are, then we
12597 // will add an error (because we weren't able to find the
12598 // terminator) but still continue parsing so that content after the
12599 // declaration of the heredoc can be parsed.
12600 if (parser->current.end >= parser->end) {
12601 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12602 parser->next_start = lex_mode->as.heredoc.next_start;
12603 parser->heredoc_end = parser->current.end;
12604 lex_state_set(parser, PM_LEX_STATE_END);
12605 lex_mode_pop(parser);
12607 }
12608
12609 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12610 size_t ident_length = heredoc_lex_mode->ident_length;
12611
12612 // If we are immediately following a newline and we have hit the
12613 // terminator, then we need to return the ending of the heredoc.
12614 if (current_token_starts_line(parser)) {
12615 const uint8_t *start = parser->current.start;
12616
12617 if (!line_continuation && (start + ident_length <= parser->end)) {
12618 const uint8_t *newline = next_newline(start, parser->end - start);
12619 const uint8_t *ident_end = newline;
12620 const uint8_t *terminator_end = newline;
12621
12622 if (newline == NULL) {
12623 terminator_end = parser->end;
12624 ident_end = parser->end;
12625 } else {
12626 terminator_end++;
12627 if (newline[-1] == '\r') {
12628 ident_end--; // Remove \r
12629 }
12630 }
12631
12632 const uint8_t *terminator_start = ident_end - ident_length;
12633 const uint8_t *cursor = start;
12634
12635 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12636 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12637 cursor++;
12638 }
12639 }
12640
12641 if (
12642 (cursor == terminator_start) &&
12643 (memcmp(terminator_start, ident_start, ident_length) == 0)
12644 ) {
12645 if (newline != NULL) {
12646 pm_newline_list_append(&parser->newline_list, newline);
12647 }
12648
12649 parser->current.end = terminator_end;
12650 if (*lex_mode->as.heredoc.next_start == '\\') {
12651 parser->next_start = NULL;
12652 } else {
12653 parser->next_start = lex_mode->as.heredoc.next_start;
12654 parser->heredoc_end = parser->current.end;
12655 }
12656
12657 lex_state_set(parser, PM_LEX_STATE_END);
12658 lex_mode_pop(parser);
12660 }
12661 }
12662
12663 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12664 if (
12665 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12666 lex_mode->as.heredoc.common_whitespace != NULL &&
12667 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12668 peek_at(parser, start) != '\n'
12669 ) {
12670 *lex_mode->as.heredoc.common_whitespace = whitespace;
12671 }
12672 }
12673
12674 // Otherwise we'll be parsing string content. These are the places
12675 // where we need to split up the content of the heredoc. We'll use
12676 // strpbrk to find the first of these characters.
12677 uint8_t breakpoints[] = "\r\n\\#";
12678
12679 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12680 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12681 breakpoints[3] = '\0';
12682 }
12683
12684 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12685 pm_token_buffer_t token_buffer = { 0 };
12686 bool was_line_continuation = false;
12687
12688 while (breakpoint != NULL) {
12689 switch (*breakpoint) {
12690 case '\0':
12691 // Skip directly past the null character.
12692 parser->current.end = breakpoint + 1;
12693 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12694 break;
12695 case '\r':
12696 parser->current.end = breakpoint + 1;
12697
12698 if (peek_at(parser, breakpoint + 1) != '\n') {
12699 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12700 break;
12701 }
12702
12703 // If we hit a \r\n sequence, then we want to replace it
12704 // with a single \n character in the final string.
12705 breakpoint++;
12706 pm_token_buffer_escape(parser, &token_buffer);
12707 token_buffer.cursor = breakpoint;
12708
12710 case '\n': {
12711 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12712 parser_flush_heredoc_end(parser);
12713 parser->current.end = breakpoint + 1;
12714 pm_token_buffer_flush(parser, &token_buffer);
12716 }
12717
12718 pm_newline_list_append(&parser->newline_list, breakpoint);
12719
12720 // If we have a - or ~ heredoc, then we can match after
12721 // some leading whitespace.
12722 const uint8_t *start = breakpoint + 1;
12723
12724 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12725 // We want to match the terminator starting from the end of the line in case
12726 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12727 const uint8_t *newline = next_newline(start, parser->end - start);
12728
12729 if (newline == NULL) {
12730 newline = parser->end;
12731 } else if (newline[-1] == '\r') {
12732 newline--; // Remove \r
12733 }
12734
12735 // Start of a possible terminator.
12736 const uint8_t *terminator_start = newline - ident_length;
12737
12738 // Cursor to check for the leading whitespace. We skip the
12739 // leading whitespace if we have a - or ~ heredoc.
12740 const uint8_t *cursor = start;
12741
12742 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12743 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12744 cursor++;
12745 }
12746 }
12747
12748 if (
12749 cursor == terminator_start &&
12750 (memcmp(terminator_start, ident_start, ident_length) == 0)
12751 ) {
12752 parser->current.end = breakpoint + 1;
12753 pm_token_buffer_flush(parser, &token_buffer);
12755 }
12756 }
12757
12758 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12759
12760 // If we have hit a newline that is followed by a valid
12761 // terminator, then we need to return the content of the
12762 // heredoc here as string content. Then, the next time a
12763 // token is lexed, it will match again and return the
12764 // end of the heredoc.
12765 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12766 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12767 *lex_mode->as.heredoc.common_whitespace = whitespace;
12768 }
12769
12770 parser->current.end = breakpoint + 1;
12771 pm_token_buffer_flush(parser, &token_buffer);
12773 }
12774
12775 // Otherwise we hit a newline and it wasn't followed by
12776 // a terminator, so we can continue parsing.
12777 parser->current.end = breakpoint + 1;
12778 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12779 break;
12780 }
12781 case '\\': {
12782 // If we hit an escape, then we need to skip past
12783 // however many characters the escape takes up. However
12784 // it's important that if \n or \r\n are escaped, we
12785 // stop looping before the newline and not after the
12786 // newline so that we can still potentially find the
12787 // terminator of the heredoc.
12788 parser->current.end = breakpoint + 1;
12789
12790 // If we've hit the end of the file, then break out of
12791 // the loop by setting the breakpoint to NULL.
12792 if (parser->current.end == parser->end) {
12793 breakpoint = NULL;
12794 continue;
12795 }
12796
12797 pm_token_buffer_escape(parser, &token_buffer);
12798 uint8_t peeked = peek(parser);
12799
12800 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12801 switch (peeked) {
12802 case '\r':
12803 parser->current.end++;
12804 if (peek(parser) != '\n') {
12805 pm_token_buffer_push_byte(&token_buffer, '\\');
12806 pm_token_buffer_push_byte(&token_buffer, '\r');
12807 break;
12808 }
12810 case '\n':
12811 pm_token_buffer_push_byte(&token_buffer, '\\');
12812 pm_token_buffer_push_byte(&token_buffer, '\n');
12813 token_buffer.cursor = parser->current.end + 1;
12814 breakpoint = parser->current.end;
12815 continue;
12816 default:
12817 pm_token_buffer_push_byte(&token_buffer, '\\');
12818 pm_token_buffer_push_escaped(&token_buffer, parser);
12819 break;
12820 }
12821 } else {
12822 switch (peeked) {
12823 case '\r':
12824 parser->current.end++;
12825 if (peek(parser) != '\n') {
12826 pm_token_buffer_push_byte(&token_buffer, '\r');
12827 break;
12828 }
12830 case '\n':
12831 // If we are in a tilde here, we should
12832 // break out of the loop and return the
12833 // string content.
12834 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12835 const uint8_t *end = parser->current.end;
12836 pm_newline_list_append(&parser->newline_list, end);
12837
12838 // Here we want the buffer to only
12839 // include up to the backslash.
12840 parser->current.end = breakpoint;
12841 pm_token_buffer_flush(parser, &token_buffer);
12842
12843 // Now we can advance the end of the
12844 // token past the newline.
12845 parser->current.end = end + 1;
12846 lex_mode->as.heredoc.line_continuation = true;
12848 }
12849
12850 was_line_continuation = true;
12851 token_buffer.cursor = parser->current.end + 1;
12852 breakpoint = parser->current.end;
12853 continue;
12854 default:
12855 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12856 break;
12857 }
12858 }
12859
12860 token_buffer.cursor = parser->current.end;
12861 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12862 break;
12863 }
12864 case '#': {
12865 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12866
12867 if (type == PM_TOKEN_NOT_PROVIDED) {
12868 // If we haven't returned at this point then we had
12869 // something that looked like an interpolated class
12870 // or instance variable like "#@" but wasn't
12871 // actually. In this case we'll just skip to the
12872 // next breakpoint.
12873 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12874 break;
12875 }
12876
12878 pm_token_buffer_flush(parser, &token_buffer);
12879 }
12880
12881 LEX(type);
12882 }
12883 default:
12884 assert(false && "unreachable");
12885 }
12886
12887 was_line_continuation = false;
12888 }
12889
12890 if (parser->current.end > parser->current.start) {
12891 parser->current.end = parser->end;
12892 pm_token_buffer_flush(parser, &token_buffer);
12894 }
12895
12896 // If we've hit the end of the string, then this is an unterminated
12897 // heredoc. In that case we'll return a string content token.
12898 parser->current.end = parser->end;
12899 pm_token_buffer_flush(parser, &token_buffer);
12901 }
12902 }
12903
12904 assert(false && "unreachable");
12905}
12906
12907#undef LEX
12908
12909/******************************************************************************/
12910/* Parse functions */
12911/******************************************************************************/
12912
12921typedef enum {
12922 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12923 PM_BINDING_POWER_STATEMENT = 2,
12924 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12925 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12926 PM_BINDING_POWER_COMPOSITION = 8, // and or
12927 PM_BINDING_POWER_NOT = 10, // not
12928 PM_BINDING_POWER_MATCH = 12, // => in
12929 PM_BINDING_POWER_DEFINED = 14, // defined?
12930 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12931 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12932 PM_BINDING_POWER_TERNARY = 20, // ?:
12933 PM_BINDING_POWER_RANGE = 22, // .. ...
12934 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12935 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12936 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12937 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12938 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12939 PM_BINDING_POWER_BITWISE_AND = 34, // &
12940 PM_BINDING_POWER_SHIFT = 36, // << >>
12941 PM_BINDING_POWER_TERM = 38, // + -
12942 PM_BINDING_POWER_FACTOR = 40, // * / %
12943 PM_BINDING_POWER_UMINUS = 42, // -@
12944 PM_BINDING_POWER_EXPONENT = 44, // **
12945 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12946 PM_BINDING_POWER_INDEX = 48, // [] []=
12947 PM_BINDING_POWER_CALL = 50, // :: .
12948 PM_BINDING_POWER_MAX = 52
12949} pm_binding_power_t;
12950
12955typedef struct {
12957 pm_binding_power_t left;
12958
12960 pm_binding_power_t right;
12961
12964
12971
12972#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12973#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12974#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12975#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12976#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12977
12978pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12979 // rescue
12980 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12981
12982 // if unless until while
12983 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12984 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12985 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12986 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12987
12988 // and or
12989 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12990 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12991
12992 // => in
12993 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12994 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12995
12996 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12997 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12998 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12999 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13000 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13001 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13002 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13003 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13004 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13005 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13006 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13007 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13008 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13009 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13010 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13011
13012 // ?:
13013 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13014
13015 // .. ...
13016 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13017 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13018 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13019 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13020
13021 // ||
13022 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13023
13024 // &&
13025 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13026
13027 // != !~ == === =~ <=>
13028 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13029 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13030 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13031 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13032 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13033 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13034
13035 // > >= < <=
13036 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13037 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13038 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13039 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13040
13041 // ^ |
13042 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13043 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13044
13045 // &
13046 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13047
13048 // >> <<
13049 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13050 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13051
13052 // - +
13053 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13054 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13055
13056 // % / *
13057 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13058 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13059 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13060 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13061
13062 // -@
13063 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13064 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13065
13066 // **
13067 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13068 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13069
13070 // ! ~ +@
13071 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13072 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13073 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13074
13075 // [
13076 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13077
13078 // :: . &.
13079 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13080 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13081 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13082};
13083
13084#undef BINDING_POWER_ASSIGNMENT
13085#undef LEFT_ASSOCIATIVE
13086#undef RIGHT_ASSOCIATIVE
13087#undef RIGHT_ASSOCIATIVE_UNARY
13088
13092static inline bool
13093match1(const pm_parser_t *parser, pm_token_type_t type) {
13094 return parser->current.type == type;
13095}
13096
13100static inline bool
13101match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13102 return match1(parser, type1) || match1(parser, type2);
13103}
13104
13108static inline bool
13109match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13110 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13111}
13112
13116static inline bool
13117match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13118 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13119}
13120
13124static inline bool
13125match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13126 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13127}
13128
13132static inline bool
13133match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13134 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13135}
13136
13140static inline bool
13141match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13142 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13143}
13144
13151static bool
13152accept1(pm_parser_t *parser, pm_token_type_t type) {
13153 if (match1(parser, type)) {
13154 parser_lex(parser);
13155 return true;
13156 }
13157 return false;
13158}
13159
13164static inline bool
13165accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13166 if (match2(parser, type1, type2)) {
13167 parser_lex(parser);
13168 return true;
13169 }
13170 return false;
13171}
13172
13184static void
13185expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13186 if (accept1(parser, type)) return;
13187
13188 const uint8_t *location = parser->previous.end;
13189 pm_parser_err(parser, location, location, diag_id);
13190
13191 parser->previous.start = location;
13192 parser->previous.type = PM_TOKEN_MISSING;
13193}
13194
13199static void
13200expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13201 if (accept2(parser, type1, type2)) return;
13202
13203 const uint8_t *location = parser->previous.end;
13204 pm_parser_err(parser, location, location, diag_id);
13205
13206 parser->previous.start = location;
13207 parser->previous.type = PM_TOKEN_MISSING;
13208}
13209
13214static void
13215expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13216 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13217 parser_lex(parser);
13218 } else {
13219 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13220 parser->previous.start = parser->previous.end;
13221 parser->previous.type = PM_TOKEN_MISSING;
13222 }
13223}
13224
13225static pm_node_t *
13226parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13227
13232static pm_node_t *
13233parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13234 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13235 pm_assert_value_expression(parser, node);
13236 return node;
13237}
13238
13257static inline bool
13258token_begins_expression_p(pm_token_type_t type) {
13259 switch (type) {
13262 // We need to special case this because it is a binary operator that
13263 // should not be marked as beginning an expression.
13264 return false;
13267 case PM_TOKEN_COLON:
13268 case PM_TOKEN_COMMA:
13270 case PM_TOKEN_EOF:
13281 case PM_TOKEN_NEWLINE:
13283 case PM_TOKEN_SEMICOLON:
13284 // The reason we need this short-circuit is because we're using the
13285 // binding powers table to tell us if the subsequent token could
13286 // potentially be the start of an expression. If there _is_ a binding
13287 // power for one of these tokens, then we should remove it from this list
13288 // and let it be handled by the default case below.
13289 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13290 return false;
13292 // This is a special case because this unary operator cannot appear
13293 // as a general operator, it only appears in certain circumstances.
13294 return false;
13296 case PM_TOKEN_UMINUS:
13298 case PM_TOKEN_UPLUS:
13299 case PM_TOKEN_BANG:
13300 case PM_TOKEN_TILDE:
13301 case PM_TOKEN_UDOT_DOT:
13303 // These unary tokens actually do have binding power associated with them
13304 // so that we can correctly place them into the precedence order. But we
13305 // want them to be marked as beginning an expression, so we need to
13306 // special case them here.
13307 return true;
13308 default:
13309 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13310 }
13311}
13312
13317static pm_node_t *
13318parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13319 if (accept1(parser, PM_TOKEN_USTAR)) {
13320 pm_token_t operator = parser->previous;
13321 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13322 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13323 }
13324
13325 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13326}
13327
13332static void
13333parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13334 // The method name needs to change. If we previously had
13335 // foo, we now need foo=. In this case we'll allocate a new
13336 // owned string, copy the previous method name in, and
13337 // append an =.
13338 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13339 size_t length = constant->length;
13340 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13341 if (name == NULL) return;
13342
13343 memcpy(name, constant->start, length);
13344 name[length] = '=';
13345
13346 // Now switch the name to the new string.
13347 // This silences clang analyzer warning about leak of memory pointed by `name`.
13348 // NOLINTNEXTLINE(clang-analyzer-*)
13349 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13350}
13351
13358static pm_node_t *
13359parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13360 switch (PM_NODE_TYPE(target)) {
13361 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13362 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13363 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13364 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13365 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13366 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13367 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13368 default: break;
13369 }
13370
13371 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13372 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13373
13374 pm_node_destroy(parser, target);
13375 return (pm_node_t *) result;
13376}
13377
13383static void
13384parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13385 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13386
13387 for (size_t index = 0; index < implicit_parameters->size; index++) {
13388 if (implicit_parameters->nodes[index] == node) {
13389 // If the node is not the last one in the list, we need to shift the
13390 // remaining nodes down to fill the gap. This is extremely unlikely
13391 // to happen.
13392 if (index != implicit_parameters->size - 1) {
13393 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13394 }
13395
13396 implicit_parameters->size--;
13397 break;
13398 }
13399 }
13400}
13401
13410static pm_node_t *
13411parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13412 switch (PM_NODE_TYPE(target)) {
13413 case PM_MISSING_NODE:
13414 return target;
13416 case PM_FALSE_NODE:
13419 case PM_NIL_NODE:
13420 case PM_SELF_NODE:
13421 case PM_TRUE_NODE: {
13422 // In these special cases, we have specific error messages and we
13423 // will replace them with local variable writes.
13424 return parse_unwriteable_target(parser, target);
13425 }
13429 return target;
13431 if (context_def_p(parser)) {
13432 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13433 }
13434
13437
13438 return target;
13440 if (context_def_p(parser)) {
13441 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13442 }
13443
13444 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13445 target->type = PM_CONSTANT_TARGET_NODE;
13446
13447 return target;
13450 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13451 return target;
13455 return target;
13457 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13458 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13459 parse_target_implicit_parameter(parser, target);
13460 }
13461
13462 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13463 uint32_t name = cast->name;
13464 uint32_t depth = cast->depth;
13465 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13466
13469
13470 return target;
13471 }
13473 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13474 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13475
13476 parse_target_implicit_parameter(parser, target);
13477 pm_node_destroy(parser, target);
13478
13479 return node;
13480 }
13484 return target;
13486 if (splat_parent) {
13487 // Multi target is not accepted in all positions. If this is one
13488 // of them, then we need to add an error.
13489 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13490 }
13491
13492 return target;
13493 case PM_SPLAT_NODE: {
13494 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13495
13496 if (splat->expression != NULL) {
13497 splat->expression = parse_target(parser, splat->expression, multiple, true);
13498 }
13499
13500 return (pm_node_t *) splat;
13501 }
13502 case PM_CALL_NODE: {
13503 pm_call_node_t *call = (pm_call_node_t *) target;
13504
13505 // If we have no arguments to the call node and we need this to be a
13506 // target then this is either a method call or a local variable
13507 // write.
13508 if (
13509 (call->message_loc.start != NULL) &&
13510 (call->message_loc.end[-1] != '!') &&
13511 (call->message_loc.end[-1] != '?') &&
13512 (call->opening_loc.start == NULL) &&
13513 (call->arguments == NULL) &&
13514 (call->block == NULL)
13515 ) {
13516 if (call->receiver == NULL) {
13517 // When we get here, we have a local variable write, because it
13518 // was previously marked as a method call but now we have an =.
13519 // This looks like:
13520 //
13521 // foo = 1
13522 //
13523 // When it was parsed in the prefix position, foo was seen as a
13524 // method call with no receiver and no arguments. Now we have an
13525 // =, so we know it's a local variable write.
13526 const pm_location_t message_loc = call->message_loc;
13527
13528 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13529 pm_node_destroy(parser, target);
13530
13531 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13532 }
13533
13534 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13535 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13536 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13537 }
13538
13539 parse_write_name(parser, &call->name);
13540 return (pm_node_t *) pm_call_target_node_create(parser, call);
13541 }
13542 }
13543
13544 // If there is no call operator and the message is "[]" then this is
13545 // an aref expression, and we can transform it into an aset
13546 // expression.
13547 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13548 return (pm_node_t *) pm_index_target_node_create(parser, call);
13549 }
13550 }
13552 default:
13553 // In this case we have a node that we don't know how to convert
13554 // into a target. We need to treat it as an error. For now, we'll
13555 // mark it as an error and just skip right past it.
13556 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13557 return target;
13558 }
13559}
13560
13565static pm_node_t *
13566parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13567 pm_node_t *result = parse_target(parser, target, multiple, false);
13568
13569 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13570 // parens after the targets.
13571 if (
13572 !match1(parser, PM_TOKEN_EQUAL) &&
13573 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13574 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13575 ) {
13576 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13577 }
13578
13579 return result;
13580}
13581
13586static pm_node_t *
13587parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13588 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13589
13590 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13591 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13592 }
13593
13594 return write;
13595}
13596
13600static pm_node_t *
13601parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13602 switch (PM_NODE_TYPE(target)) {
13603 case PM_MISSING_NODE:
13604 pm_node_destroy(parser, value);
13605 return target;
13607 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13608 pm_node_destroy(parser, target);
13609 return (pm_node_t *) node;
13610 }
13611 case PM_CONSTANT_PATH_NODE: {
13612 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13613
13614 if (context_def_p(parser)) {
13615 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13616 }
13617
13618 return parse_shareable_constant_write(parser, node);
13619 }
13620 case PM_CONSTANT_READ_NODE: {
13621 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13622
13623 if (context_def_p(parser)) {
13624 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13625 }
13626
13627 pm_node_destroy(parser, target);
13628 return parse_shareable_constant_write(parser, node);
13629 }
13632 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13635 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13636 pm_node_destroy(parser, target);
13637 return (pm_node_t *) node;
13638 }
13641
13642 pm_constant_id_t name = local_read->name;
13643 pm_location_t name_loc = target->location;
13644
13645 uint32_t depth = local_read->depth;
13646 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13647
13648 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13649 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13650 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13651 parse_target_implicit_parameter(parser, target);
13652 }
13653
13654 pm_locals_unread(&scope->locals, name);
13655 pm_node_destroy(parser, target);
13656
13657 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13658 }
13660 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13661 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13662
13663 parse_target_implicit_parameter(parser, target);
13664 pm_node_destroy(parser, target);
13665
13666 return node;
13667 }
13669 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13670 pm_node_destroy(parser, target);
13671 return write_node;
13672 }
13674 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13675 case PM_SPLAT_NODE: {
13676 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13677
13678 if (splat->expression != NULL) {
13679 splat->expression = parse_write(parser, splat->expression, operator, value);
13680 }
13681
13682 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13683 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13684
13685 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13686 }
13687 case PM_CALL_NODE: {
13688 pm_call_node_t *call = (pm_call_node_t *) target;
13689
13690 // If we have no arguments to the call node and we need this to be a
13691 // target then this is either a method call or a local variable
13692 // write.
13693 if (
13694 (call->message_loc.start != NULL) &&
13695 (call->message_loc.end[-1] != '!') &&
13696 (call->message_loc.end[-1] != '?') &&
13697 (call->opening_loc.start == NULL) &&
13698 (call->arguments == NULL) &&
13699 (call->block == NULL)
13700 ) {
13701 if (call->receiver == NULL) {
13702 // When we get here, we have a local variable write, because it
13703 // was previously marked as a method call but now we have an =.
13704 // This looks like:
13705 //
13706 // foo = 1
13707 //
13708 // When it was parsed in the prefix position, foo was seen as a
13709 // method call with no receiver and no arguments. Now we have an
13710 // =, so we know it's a local variable write.
13711 const pm_location_t message = call->message_loc;
13712
13713 pm_parser_local_add_location(parser, message.start, message.end, 0);
13714 pm_node_destroy(parser, target);
13715
13716 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13717 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13718
13719 pm_refute_numbered_parameter(parser, message.start, message.end);
13720 return target;
13721 }
13722
13723 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13724 // When we get here, we have a method call, because it was
13725 // previously marked as a method call but now we have an =. This
13726 // looks like:
13727 //
13728 // foo.bar = 1
13729 //
13730 // When it was parsed in the prefix position, foo.bar was seen as a
13731 // method call with no arguments. Now we have an =, so we know it's
13732 // a method call with an argument. In this case we will create the
13733 // arguments node, parse the argument, and add it to the list.
13734 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13735 call->arguments = arguments;
13736
13737 pm_arguments_node_arguments_append(arguments, value);
13738 call->base.location.end = arguments->base.location.end;
13739
13740 parse_write_name(parser, &call->name);
13741 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13742
13743 return (pm_node_t *) call;
13744 }
13745 }
13746
13747 // If there is no call operator and the message is "[]" then this is
13748 // an aref expression, and we can transform it into an aset
13749 // expression.
13750 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13751 if (call->arguments == NULL) {
13752 call->arguments = pm_arguments_node_create(parser);
13753 }
13754
13755 pm_arguments_node_arguments_append(call->arguments, value);
13756 target->location.end = value->location.end;
13757
13758 // Replace the name with "[]=".
13759 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13760
13761 // Ensure that the arguments for []= don't contain keywords
13762 pm_index_arguments_check(parser, call->arguments, call->block);
13763 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13764
13765 return target;
13766 }
13767
13768 // If there are arguments on the call node, then it can't be a method
13769 // call ending with = or a local variable write, so it must be a
13770 // syntax error. In this case we'll fall through to our default
13771 // handling. We need to free the value that we parsed because there
13772 // is no way for us to attach it to the tree at this point.
13773 pm_node_destroy(parser, value);
13774 }
13776 default:
13777 // In this case we have a node that we don't know how to convert into a
13778 // target. We need to treat it as an error. For now, we'll mark it as an
13779 // error and just skip right past it.
13780 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13781 return target;
13782 }
13783}
13784
13791static pm_node_t *
13792parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13793 switch (PM_NODE_TYPE(target)) {
13794 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13795 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13796 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13797 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13798 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13799 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13800 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13801 default: break;
13802 }
13803
13804 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13805 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13806
13807 pm_node_destroy(parser, target);
13808 return (pm_node_t *) result;
13809}
13810
13821static pm_node_t *
13822parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13823 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13824
13825 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13826 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13827
13828 while (accept1(parser, PM_TOKEN_COMMA)) {
13829 if (accept1(parser, PM_TOKEN_USTAR)) {
13830 // Here we have a splat operator. It can have a name or be
13831 // anonymous. It can be the final target or be in the middle if
13832 // there haven't been any others yet.
13833 if (has_rest) {
13834 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13835 }
13836
13837 pm_token_t star_operator = parser->previous;
13838 pm_node_t *name = NULL;
13839
13840 if (token_begins_expression_p(parser->current.type)) {
13841 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13842 name = parse_target(parser, name, true, true);
13843 }
13844
13845 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13846 pm_multi_target_node_targets_append(parser, result, splat);
13847 has_rest = true;
13848 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13849 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13850 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13851 target = parse_target(parser, target, true, false);
13852
13853 pm_multi_target_node_targets_append(parser, result, target);
13854 context_pop(parser);
13855 } else if (token_begins_expression_p(parser->current.type)) {
13856 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13857 target = parse_target(parser, target, true, false);
13858
13859 pm_multi_target_node_targets_append(parser, result, target);
13860 } else if (!match1(parser, PM_TOKEN_EOF)) {
13861 // If we get here, then we have a trailing , in a multi target node.
13862 // We'll add an implicit rest node to represent this.
13863 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13864 pm_multi_target_node_targets_append(parser, result, rest);
13865 break;
13866 }
13867 }
13868
13869 return (pm_node_t *) result;
13870}
13871
13876static pm_node_t *
13877parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13878 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13879 accept1(parser, PM_TOKEN_NEWLINE);
13880
13881 // Ensure that we have either an = or a ) after the targets.
13882 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13883 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13884 }
13885
13886 return result;
13887}
13888
13892static pm_statements_node_t *
13893parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13894 // First, skip past any optional terminators that might be at the beginning
13895 // of the statements.
13896 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13897
13898 // If we have a terminator, then we can just return NULL.
13899 if (context_terminator(context, &parser->current)) return NULL;
13900
13901 pm_statements_node_t *statements = pm_statements_node_create(parser);
13902
13903 // At this point we know we have at least one statement, and that it
13904 // immediately follows the current token.
13905 context_push(parser, context);
13906
13907 while (true) {
13908 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13909 pm_statements_node_body_append(parser, statements, node, true);
13910
13911 // If we're recovering from a syntax error, then we need to stop parsing
13912 // the statements now.
13913 if (parser->recovering) {
13914 // If this is the level of context where the recovery has happened,
13915 // then we can mark the parser as done recovering.
13916 if (context_terminator(context, &parser->current)) parser->recovering = false;
13917 break;
13918 }
13919
13920 // If we have a terminator, then we will parse all consecutive
13921 // terminators and then continue parsing the statements list.
13922 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13923 // If we have a terminator, then we will continue parsing the
13924 // statements list.
13925 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13926 if (context_terminator(context, &parser->current)) break;
13927
13928 // Now we can continue parsing the list of statements.
13929 continue;
13930 }
13931
13932 // At this point we have a list of statements that are not terminated by
13933 // a newline or semicolon. At this point we need to check if we're at
13934 // the end of the statements list. If we are, then we should break out
13935 // of the loop.
13936 if (context_terminator(context, &parser->current)) break;
13937
13938 // At this point, we have a syntax error, because the statement was not
13939 // terminated by a newline or semicolon, and we're not at the end of the
13940 // statements list. Ideally we should scan forward to determine if we
13941 // should insert a missing terminator or break out of parsing the
13942 // statements list at this point.
13943 //
13944 // We don't have that yet, so instead we'll do a more naive approach. If
13945 // we were unable to parse an expression, then we will skip past this
13946 // token and continue parsing the statements list. Otherwise we'll add
13947 // an error and continue parsing the statements list.
13948 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13949 parser_lex(parser);
13950
13951 // If we are at the end of the file, then we need to stop parsing
13952 // the statements entirely at this point. Mark the parser as
13953 // recovering, as we know that EOF closes the top-level context, and
13954 // then break out of the loop.
13955 if (match1(parser, PM_TOKEN_EOF)) {
13956 parser->recovering = true;
13957 break;
13958 }
13959
13960 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13961 if (context_terminator(context, &parser->current)) break;
13962 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13963 // This is an inlined version of accept1 because the error that we
13964 // want to add has varargs. If this happens again, we should
13965 // probably extract a helper function.
13966 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13967 parser->previous.start = parser->previous.end;
13968 parser->previous.type = PM_TOKEN_MISSING;
13969 }
13970 }
13971
13972 context_pop(parser);
13973 bool last_value = true;
13974 switch (context) {
13977 last_value = false;
13978 break;
13979 default:
13980 break;
13981 }
13982 pm_void_statements_check(parser, statements, last_value);
13983
13984 return statements;
13985}
13986
13991static void
13992pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13993 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13994
13995 if (duplicated != NULL) {
13996 pm_buffer_t buffer = { 0 };
13997 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13998
13999 pm_diagnostic_list_append_format(
14000 &parser->warning_list,
14001 duplicated->location.start,
14002 duplicated->location.end,
14003 PM_WARN_DUPLICATED_HASH_KEY,
14004 (int) pm_buffer_length(&buffer),
14005 pm_buffer_value(&buffer),
14006 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14007 );
14008
14009 pm_buffer_free(&buffer);
14010 }
14011}
14012
14017static void
14018pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14019 pm_node_t *previous;
14020
14021 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14022 pm_diagnostic_list_append_format(
14023 &parser->warning_list,
14024 node->location.start,
14025 node->location.end,
14026 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14027 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14028 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14029 );
14030 }
14031}
14032
14036static bool
14037parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14039 bool contains_keyword_splat = false;
14040
14041 while (true) {
14042 pm_node_t *element;
14043
14044 switch (parser->current.type) {
14045 case PM_TOKEN_USTAR_STAR: {
14046 parser_lex(parser);
14047 pm_token_t operator = parser->previous;
14048 pm_node_t *value = NULL;
14049
14050 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14051 // If we're about to parse a nested hash that is being
14052 // pushed into this hash directly with **, then we want the
14053 // inner hash to share the static literals with the outer
14054 // hash.
14055 parser->current_hash_keys = literals;
14056 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14057 } else if (token_begins_expression_p(parser->current.type)) {
14058 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14059 } else {
14060 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14061 }
14062
14063 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14064 contains_keyword_splat = true;
14065 break;
14066 }
14067 case PM_TOKEN_LABEL: {
14068 pm_token_t label = parser->current;
14069 parser_lex(parser);
14070
14071 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14072 pm_hash_key_static_literals_add(parser, literals, key);
14073
14074 pm_token_t operator = not_provided(parser);
14075 pm_node_t *value = NULL;
14076
14077 if (token_begins_expression_p(parser->current.type)) {
14078 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14079 } else {
14080 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14081 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14082 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14083 } else {
14084 int depth = -1;
14085 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14086
14087 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14088 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14089 } else {
14090 depth = pm_parser_local_depth(parser, &identifier);
14091 }
14092
14093 if (depth == -1) {
14094 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14095 } else {
14096 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14097 }
14098 }
14099
14100 value->location.end++;
14101 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14102 }
14103
14104 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14105 break;
14106 }
14107 default: {
14108 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14109
14110 // Hash keys that are strings are automatically frozen. We will
14111 // mark that here.
14112 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14113 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14114 }
14115
14116 pm_hash_key_static_literals_add(parser, literals, key);
14117
14118 pm_token_t operator;
14119 if (pm_symbol_node_label_p(key)) {
14120 operator = not_provided(parser);
14121 } else {
14122 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14123 operator = parser->previous;
14124 }
14125
14126 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14127 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14128 break;
14129 }
14130 }
14131
14132 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14133 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14134 } else {
14135 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14136 }
14137
14138 // If there's no comma after the element, then we're done.
14139 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14140
14141 // If the next element starts with a label or a **, then we know we have
14142 // another element in the hash, so we'll continue parsing.
14143 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14144
14145 // Otherwise we need to check if the subsequent token begins an expression.
14146 // If it does, then we'll continue parsing.
14147 if (token_begins_expression_p(parser->current.type)) continue;
14148
14149 // Otherwise by default we will exit out of this loop.
14150 break;
14151 }
14152
14153 return contains_keyword_splat;
14154}
14155
14159static inline void
14160parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14161 if (arguments->arguments == NULL) {
14162 arguments->arguments = pm_arguments_node_create(parser);
14163 }
14164
14165 pm_arguments_node_arguments_append(arguments->arguments, argument);
14166}
14167
14171static void
14172parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14173 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14174
14175 // First we need to check if the next token is one that could be the start
14176 // of an argument. If it's not, then we can just return.
14177 if (
14178 match2(parser, terminator, PM_TOKEN_EOF) ||
14179 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14180 context_terminator(parser->current_context->context, &parser->current)
14181 ) {
14182 return;
14183 }
14184
14185 bool parsed_first_argument = false;
14186 bool parsed_bare_hash = false;
14187 bool parsed_block_argument = false;
14188 bool parsed_forwarding_arguments = false;
14189
14190 while (!match1(parser, PM_TOKEN_EOF)) {
14191 if (parsed_forwarding_arguments) {
14192 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14193 }
14194
14195 pm_node_t *argument = NULL;
14196
14197 switch (parser->current.type) {
14199 case PM_TOKEN_LABEL: {
14200 if (parsed_bare_hash) {
14201 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14202 }
14203
14204 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14205 argument = (pm_node_t *) hash;
14206
14207 pm_static_literals_t hash_keys = { 0 };
14208 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14209
14210 parse_arguments_append(parser, arguments, argument);
14211
14213 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14214 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14215
14216 pm_static_literals_free(&hash_keys);
14217 parsed_bare_hash = true;
14218
14219 break;
14220 }
14221 case PM_TOKEN_UAMPERSAND: {
14222 parser_lex(parser);
14223 pm_token_t operator = parser->previous;
14224 pm_node_t *expression = NULL;
14225
14226 if (token_begins_expression_p(parser->current.type)) {
14227 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14228 } else {
14229 pm_parser_scope_forwarding_block_check(parser, &operator);
14230 }
14231
14232 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14233 if (parsed_block_argument) {
14234 parse_arguments_append(parser, arguments, argument);
14235 } else {
14236 arguments->block = argument;
14237 }
14238
14239 if (match1(parser, PM_TOKEN_COMMA)) {
14240 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14241 }
14242
14243 parsed_block_argument = true;
14244 break;
14245 }
14246 case PM_TOKEN_USTAR: {
14247 parser_lex(parser);
14248 pm_token_t operator = parser->previous;
14249
14251 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14252 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14253 if (parsed_bare_hash) {
14254 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14255 }
14256 } else {
14257 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14258
14259 if (parsed_bare_hash) {
14260 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14261 }
14262
14263 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14264 }
14265
14266 parse_arguments_append(parser, arguments, argument);
14267 break;
14268 }
14269 case PM_TOKEN_UDOT_DOT_DOT: {
14270 if (accepts_forwarding) {
14271 parser_lex(parser);
14272
14273 if (token_begins_expression_p(parser->current.type)) {
14274 // If the token begins an expression then this ... was
14275 // not actually argument forwarding but was instead a
14276 // range.
14277 pm_token_t operator = parser->previous;
14278 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14279
14280 // If we parse a range, we need to validate that we
14281 // didn't accidentally violate the nonassoc rules of the
14282 // ... operator.
14283 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14284 pm_range_node_t *range = (pm_range_node_t *) right;
14285 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14286 }
14287
14288 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14289 } else {
14290 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14291 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14292 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14293 }
14294
14295 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14296 parse_arguments_append(parser, arguments, argument);
14297 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14298 arguments->has_forwarding = true;
14299 parsed_forwarding_arguments = true;
14300 break;
14301 }
14302 }
14303 }
14305 default: {
14306 if (argument == NULL) {
14307 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14308 }
14309
14310 bool contains_keywords = false;
14311 bool contains_keyword_splat = false;
14312
14313 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14314 if (parsed_bare_hash) {
14315 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14316 }
14317
14318 pm_token_t operator;
14319 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14320 operator = parser->previous;
14321 } else {
14322 operator = not_provided(parser);
14323 }
14324
14325 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14326 contains_keywords = true;
14327
14328 // Create the set of static literals for this hash.
14329 pm_static_literals_t hash_keys = { 0 };
14330 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14331
14332 // Finish parsing the one we are part way through.
14333 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14334 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14335
14336 pm_keyword_hash_node_elements_append(bare_hash, argument);
14337 argument = (pm_node_t *) bare_hash;
14338
14339 // Then parse more if we have a comma
14340 if (accept1(parser, PM_TOKEN_COMMA) && (
14341 token_begins_expression_p(parser->current.type) ||
14342 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14343 )) {
14344 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14345 }
14346
14347 pm_static_literals_free(&hash_keys);
14348 parsed_bare_hash = true;
14349 }
14350
14351 parse_arguments_append(parser, arguments, argument);
14352
14353 pm_node_flags_t flags = 0;
14354 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14355 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14356 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14357
14358 break;
14359 }
14360 }
14361
14362 parsed_first_argument = true;
14363
14364 // If parsing the argument failed, we need to stop parsing arguments.
14365 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14366
14367 // If the terminator of these arguments is not EOF, then we have a
14368 // specific token we're looking for. In that case we can accept a
14369 // newline here because it is not functioning as a statement terminator.
14370 bool accepted_newline = false;
14371 if (terminator != PM_TOKEN_EOF) {
14372 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14373 }
14374
14375 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14376 // If we previously were on a comma and we just parsed a bare hash,
14377 // then we want to continue parsing arguments. This is because the
14378 // comma was grabbed up by the hash parser.
14379 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14380 // If there was a comma, then we need to check if we also accepted a
14381 // newline. If we did, then this is a syntax error.
14382 if (accepted_newline) {
14383 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14384 }
14385 } else {
14386 // If there is no comma at the end of the argument list then we're
14387 // done parsing arguments and can break out of this loop.
14388 break;
14389 }
14390
14391 // If we hit the terminator, then that means we have a trailing comma so
14392 // we can accept that output as well.
14393 if (match1(parser, terminator)) break;
14394 }
14395}
14396
14408parse_required_destructured_parameter(pm_parser_t *parser) {
14409 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14410
14411 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14412 pm_multi_target_node_opening_set(node, &parser->previous);
14413
14414 do {
14415 pm_node_t *param;
14416
14417 // If we get here then we have a trailing comma, which isn't allowed in
14418 // the grammar. In other places, multi targets _do_ allow trailing
14419 // commas, so here we'll assume this is a mistake of the user not
14420 // knowing it's not allowed here.
14421 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14422 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14423 pm_multi_target_node_targets_append(parser, node, param);
14424 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14425 break;
14426 }
14427
14428 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14429 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14430 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14431 pm_token_t star = parser->previous;
14432 pm_node_t *value = NULL;
14433
14434 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14435 pm_token_t name = parser->previous;
14436 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14437 if (pm_parser_parameter_name_check(parser, &name)) {
14438 pm_node_flag_set_repeated_parameter(value);
14439 }
14440 pm_parser_local_add_token(parser, &name, 1);
14441 }
14442
14443 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14444 } else {
14445 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14446 pm_token_t name = parser->previous;
14447
14448 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14449 if (pm_parser_parameter_name_check(parser, &name)) {
14450 pm_node_flag_set_repeated_parameter(param);
14451 }
14452 pm_parser_local_add_token(parser, &name, 1);
14453 }
14454
14455 pm_multi_target_node_targets_append(parser, node, param);
14456 } while (accept1(parser, PM_TOKEN_COMMA));
14457
14458 accept1(parser, PM_TOKEN_NEWLINE);
14459 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14460 pm_multi_target_node_closing_set(node, &parser->previous);
14461
14462 return node;
14463}
14464
14469typedef enum {
14470 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14471 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14472 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14473 PM_PARAMETERS_ORDER_KEYWORDS,
14474 PM_PARAMETERS_ORDER_REST,
14475 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14476 PM_PARAMETERS_ORDER_OPTIONAL,
14477 PM_PARAMETERS_ORDER_NAMED,
14478 PM_PARAMETERS_ORDER_NONE,
14479} pm_parameters_order_t;
14480
14484static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14485 [0] = PM_PARAMETERS_NO_CHANGE,
14486 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14487 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14488 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14489 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14490 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14491 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14492 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14493 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14494 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14495 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14496 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14497};
14498
14506static bool
14507update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14508 pm_parameters_order_t state = parameters_ordering[token->type];
14509 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14510
14511 // If we see another ordered argument after a optional argument
14512 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14513 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14514 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14515 return true;
14516 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14517 return true;
14518 }
14519
14520 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14521 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14522 return false;
14523 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14524 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14525 return false;
14526 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14527 // We know what transition we failed on, so we can provide a better error here.
14528 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14529 return false;
14530 }
14531
14532 if (state < *current) *current = state;
14533 return true;
14534}
14535
14539static pm_parameters_node_t *
14540parse_parameters(
14541 pm_parser_t *parser,
14542 pm_binding_power_t binding_power,
14543 bool uses_parentheses,
14544 bool allows_trailing_comma,
14545 bool allows_forwarding_parameters,
14546 bool accepts_blocks_in_defaults,
14547 bool in_block,
14548 uint16_t depth
14549) {
14550 pm_do_loop_stack_push(parser, false);
14551
14552 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14553 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14554
14555 while (true) {
14556 bool parsing = true;
14557
14558 switch (parser->current.type) {
14560 update_parameter_state(parser, &parser->current, &order);
14561 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14562
14563 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14564 pm_parameters_node_requireds_append(params, param);
14565 } else {
14566 pm_parameters_node_posts_append(params, param);
14567 }
14568 break;
14569 }
14571 case PM_TOKEN_AMPERSAND: {
14572 update_parameter_state(parser, &parser->current, &order);
14573 parser_lex(parser);
14574
14575 pm_token_t operator = parser->previous;
14576 pm_token_t name;
14577
14578 bool repeated = false;
14579 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14580 name = parser->previous;
14581 repeated = pm_parser_parameter_name_check(parser, &name);
14582 pm_parser_local_add_token(parser, &name, 1);
14583 } else {
14584 name = not_provided(parser);
14585 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14586 }
14587
14588 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14589 if (repeated) {
14590 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14591 }
14592 if (params->block == NULL) {
14593 pm_parameters_node_block_set(params, param);
14594 } else {
14595 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14596 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14597 }
14598
14599 break;
14600 }
14601 case PM_TOKEN_UDOT_DOT_DOT: {
14602 if (!allows_forwarding_parameters) {
14603 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14604 }
14605
14606 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14607 parser_lex(parser);
14608
14609 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14610 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14611
14612 if (params->keyword_rest != NULL) {
14613 // If we already have a keyword rest parameter, then we replace it with the
14614 // forwarding parameter and move the keyword rest parameter to the posts list.
14615 pm_node_t *keyword_rest = params->keyword_rest;
14616 pm_parameters_node_posts_append(params, keyword_rest);
14617 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14618 params->keyword_rest = NULL;
14619 }
14620
14621 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14622 break;
14623 }
14626 case PM_TOKEN_CONSTANT:
14629 case PM_TOKEN_METHOD_NAME: {
14630 parser_lex(parser);
14631 switch (parser->previous.type) {
14632 case PM_TOKEN_CONSTANT:
14633 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14634 break;
14636 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14637 break;
14639 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14640 break;
14642 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14643 break;
14645 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14646 break;
14647 default: break;
14648 }
14649
14650 if (parser->current.type == PM_TOKEN_EQUAL) {
14651 update_parameter_state(parser, &parser->current, &order);
14652 } else {
14653 update_parameter_state(parser, &parser->previous, &order);
14654 }
14655
14656 pm_token_t name = parser->previous;
14657 bool repeated = pm_parser_parameter_name_check(parser, &name);
14658 pm_parser_local_add_token(parser, &name, 1);
14659
14660 if (match1(parser, PM_TOKEN_EQUAL)) {
14661 pm_token_t operator = parser->current;
14662 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14663 parser_lex(parser);
14664
14665 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14666 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14667
14668 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14669 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14670 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14671
14672 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14673
14674 if (repeated) {
14675 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14676 }
14677 pm_parameters_node_optionals_append(params, param);
14678
14679 // If the value of the parameter increased the number of
14680 // reads of that parameter, then we need to warn that we
14681 // have a circular definition.
14682 if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14683 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14684 }
14685
14686 context_pop(parser);
14687
14688 // If parsing the value of the parameter resulted in error recovery,
14689 // then we can put a missing node in its place and stop parsing the
14690 // parameters entirely now.
14691 if (parser->recovering) {
14692 parsing = false;
14693 break;
14694 }
14695 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14696 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14697 if (repeated) {
14698 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14699 }
14700 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14701 } else {
14702 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14703 if (repeated) {
14704 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14705 }
14706 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14707 }
14708
14709 break;
14710 }
14711 case PM_TOKEN_LABEL: {
14712 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14713 update_parameter_state(parser, &parser->current, &order);
14714
14715 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14716 parser_lex(parser);
14717
14718 pm_token_t name = parser->previous;
14719 pm_token_t local = name;
14720 local.end -= 1;
14721
14722 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14723 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14724 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14725 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14726 }
14727
14728 bool repeated = pm_parser_parameter_name_check(parser, &local);
14729 pm_parser_local_add_token(parser, &local, 1);
14730
14731 switch (parser->current.type) {
14732 case PM_TOKEN_COMMA:
14734 case PM_TOKEN_PIPE: {
14735 context_pop(parser);
14736
14737 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14738 if (repeated) {
14739 pm_node_flag_set_repeated_parameter(param);
14740 }
14741
14742 pm_parameters_node_keywords_append(params, param);
14743 break;
14744 }
14745 case PM_TOKEN_SEMICOLON:
14746 case PM_TOKEN_NEWLINE: {
14747 context_pop(parser);
14748
14749 if (uses_parentheses) {
14750 parsing = false;
14751 break;
14752 }
14753
14754 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14755 if (repeated) {
14756 pm_node_flag_set_repeated_parameter(param);
14757 }
14758
14759 pm_parameters_node_keywords_append(params, param);
14760 break;
14761 }
14762 default: {
14763 pm_node_t *param;
14764
14765 if (token_begins_expression_p(parser->current.type)) {
14766 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14767 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14768
14769 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14770 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14771 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14772
14773 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14774 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14775 }
14776
14777 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14778 }
14779 else {
14780 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14781 }
14782
14783 if (repeated) {
14784 pm_node_flag_set_repeated_parameter(param);
14785 }
14786
14787 context_pop(parser);
14788 pm_parameters_node_keywords_append(params, param);
14789
14790 // If parsing the value of the parameter resulted in error recovery,
14791 // then we can put a missing node in its place and stop parsing the
14792 // parameters entirely now.
14793 if (parser->recovering) {
14794 parsing = false;
14795 break;
14796 }
14797 }
14798 }
14799
14800 parser->in_keyword_arg = false;
14801 break;
14802 }
14803 case PM_TOKEN_USTAR:
14804 case PM_TOKEN_STAR: {
14805 update_parameter_state(parser, &parser->current, &order);
14806 parser_lex(parser);
14807
14808 pm_token_t operator = parser->previous;
14809 pm_token_t name;
14810 bool repeated = false;
14811
14812 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14813 name = parser->previous;
14814 repeated = pm_parser_parameter_name_check(parser, &name);
14815 pm_parser_local_add_token(parser, &name, 1);
14816 } else {
14817 name = not_provided(parser);
14818 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14819 }
14820
14821 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14822 if (repeated) {
14823 pm_node_flag_set_repeated_parameter(param);
14824 }
14825
14826 if (params->rest == NULL) {
14827 pm_parameters_node_rest_set(params, param);
14828 } else {
14829 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14830 pm_parameters_node_posts_append(params, param);
14831 }
14832
14833 break;
14834 }
14835 case PM_TOKEN_STAR_STAR:
14836 case PM_TOKEN_USTAR_STAR: {
14837 pm_parameters_order_t previous_order = order;
14838 update_parameter_state(parser, &parser->current, &order);
14839 parser_lex(parser);
14840
14841 pm_token_t operator = parser->previous;
14842 pm_node_t *param;
14843
14844 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14845 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14846 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14847 }
14848
14849 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14850 } else {
14851 pm_token_t name;
14852
14853 bool repeated = false;
14854 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14855 name = parser->previous;
14856 repeated = pm_parser_parameter_name_check(parser, &name);
14857 pm_parser_local_add_token(parser, &name, 1);
14858 } else {
14859 name = not_provided(parser);
14860 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14861 }
14862
14863 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14864 if (repeated) {
14865 pm_node_flag_set_repeated_parameter(param);
14866 }
14867 }
14868
14869 if (params->keyword_rest == NULL) {
14870 pm_parameters_node_keyword_rest_set(params, param);
14871 } else {
14872 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14873 pm_parameters_node_posts_append(params, param);
14874 }
14875
14876 break;
14877 }
14878 default:
14879 if (parser->previous.type == PM_TOKEN_COMMA) {
14880 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14881 // If we get here, then we have a trailing comma in a
14882 // block parameter list.
14883 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14884
14885 if (params->rest == NULL) {
14886 pm_parameters_node_rest_set(params, param);
14887 } else {
14888 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14889 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14890 }
14891 } else {
14892 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14893 }
14894 }
14895
14896 parsing = false;
14897 break;
14898 }
14899
14900 // If we hit some kind of issue while parsing the parameter, this would
14901 // have been set to false. In that case, we need to break out of the
14902 // loop.
14903 if (!parsing) break;
14904
14905 bool accepted_newline = false;
14906 if (uses_parentheses) {
14907 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14908 }
14909
14910 if (accept1(parser, PM_TOKEN_COMMA)) {
14911 // If there was a comma, but we also accepted a newline, then this
14912 // is a syntax error.
14913 if (accepted_newline) {
14914 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14915 }
14916 } else {
14917 // If there was no comma, then we're done parsing parameters.
14918 break;
14919 }
14920 }
14921
14922 pm_do_loop_stack_pop(parser);
14923
14924 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14925 if (params->base.location.start == params->base.location.end) {
14926 pm_node_destroy(parser, (pm_node_t *) params);
14927 return NULL;
14928 }
14929
14930 return params;
14931}
14932
14937static size_t
14938token_newline_index(const pm_parser_t *parser) {
14939 if (parser->heredoc_end == NULL) {
14940 // This is the common case. In this case we can look at the previously
14941 // recorded newline in the newline list and subtract from the current
14942 // offset.
14943 return parser->newline_list.size - 1;
14944 } else {
14945 // This is unlikely. This is the case that we have already parsed the
14946 // start of a heredoc, so we cannot rely on looking at the previous
14947 // offset of the newline list, and instead must go through the whole
14948 // process of a binary search for the line number.
14949 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14950 }
14951}
14952
14957static int64_t
14958token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14959 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14960 const uint8_t *end = token->start;
14961
14962 // Skip over the BOM if it is present.
14963 if (
14964 newline_index == 0 &&
14965 parser->start[0] == 0xef &&
14966 parser->start[1] == 0xbb &&
14967 parser->start[2] == 0xbf
14968 ) cursor += 3;
14969
14970 int64_t column = 0;
14971 for (; cursor < end; cursor++) {
14972 switch (*cursor) {
14973 case '\t':
14974 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14975 break;
14976 case ' ':
14977 column++;
14978 break;
14979 default:
14980 column++;
14981 if (break_on_non_space) return -1;
14982 break;
14983 }
14984 }
14985
14986 return column;
14987}
14988
14993static void
14994parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14995 // If these warnings are disabled (unlikely), then we can just return.
14996 if (!parser->warn_mismatched_indentation) return;
14997
14998 // If the tokens are on the same line, we do not warn.
14999 size_t closing_newline_index = token_newline_index(parser);
15000 if (opening_newline_index == closing_newline_index) return;
15001
15002 // If the opening token has anything other than spaces or tabs before it,
15003 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15004 // and the `if` immediately follows an `else` keyword.
15005 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15006 if (!if_after_else && (opening_column == -1)) return;
15007
15008 // Get a reference to the closing token off the current parser. This assumes
15009 // that the caller has placed this in the correct position.
15010 pm_token_t *closing_token = &parser->current;
15011
15012 // If the tokens are at the same indentation, we do not warn.
15013 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15014 if ((closing_column == -1) || (opening_column == closing_column)) return;
15015
15016 // If the closing column is greater than the opening column and we are
15017 // allowing indentation, then we do not warn.
15018 if (allow_indent && (closing_column > opening_column)) return;
15019
15020 // Otherwise, add a warning.
15021 PM_PARSER_WARN_FORMAT(
15022 parser,
15023 closing_token->start,
15024 closing_token->end,
15025 PM_WARN_INDENTATION_MISMATCH,
15026 (int) (closing_token->end - closing_token->start),
15027 (const char *) closing_token->start,
15028 (int) (opening_token->end - opening_token->start),
15029 (const char *) opening_token->start,
15030 ((int32_t) opening_newline_index) + parser->start_line
15031 );
15032}
15033
15034typedef enum {
15035 PM_RESCUES_BEGIN = 1,
15036 PM_RESCUES_BLOCK,
15037 PM_RESCUES_CLASS,
15038 PM_RESCUES_DEF,
15039 PM_RESCUES_LAMBDA,
15040 PM_RESCUES_MODULE,
15041 PM_RESCUES_SCLASS
15042} pm_rescues_type_t;
15043
15048static inline void
15049parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15050 pm_rescue_node_t *current = NULL;
15051
15052 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15053 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15054 parser_lex(parser);
15055
15056 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15057
15058 switch (parser->current.type) {
15060 // Here we have an immediate => after the rescue keyword, in which case
15061 // we're going to have an empty list of exceptions to rescue (which
15062 // implies StandardError).
15063 parser_lex(parser);
15064 pm_rescue_node_operator_set(rescue, &parser->previous);
15065
15066 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15067 reference = parse_target(parser, reference, false, false);
15068
15069 pm_rescue_node_reference_set(rescue, reference);
15070 break;
15071 }
15072 case PM_TOKEN_NEWLINE:
15073 case PM_TOKEN_SEMICOLON:
15075 // Here we have a terminator for the rescue keyword, in which case we're
15076 // going to just continue on.
15077 break;
15078 default: {
15079 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15080 // Here we have something that could be an exception expression, so
15081 // we'll attempt to parse it here and any others delimited by commas.
15082
15083 do {
15084 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15085 pm_rescue_node_exceptions_append(rescue, expression);
15086
15087 // If we hit a newline, then this is the end of the rescue expression. We
15088 // can continue on to parse the statements.
15089 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15090
15091 // If we hit a `=>` then we're going to parse the exception variable. Once
15092 // we've done that, we'll break out of the loop and parse the statements.
15093 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15094 pm_rescue_node_operator_set(rescue, &parser->previous);
15095
15096 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15097 reference = parse_target(parser, reference, false, false);
15098
15099 pm_rescue_node_reference_set(rescue, reference);
15100 break;
15101 }
15102 } while (accept1(parser, PM_TOKEN_COMMA));
15103 }
15104 }
15105 }
15106
15107 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15108 accept1(parser, PM_TOKEN_KEYWORD_THEN);
15109 } else {
15110 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15111 }
15112
15114 pm_accepts_block_stack_push(parser, true);
15115 pm_context_t context;
15116
15117 switch (type) {
15118 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15119 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15120 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15121 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15122 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15123 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15124 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15125 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15126 }
15127
15128 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15129 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15130
15131 pm_accepts_block_stack_pop(parser);
15132 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15133 }
15134
15135 if (current == NULL) {
15136 pm_begin_node_rescue_clause_set(parent_node, rescue);
15137 } else {
15138 pm_rescue_node_subsequent_set(current, rescue);
15139 }
15140
15141 current = rescue;
15142 }
15143
15144 // The end node locations on rescue nodes will not be set correctly
15145 // since we won't know the end until we've found all subsequent
15146 // clauses. This sets the end location on all rescues once we know it.
15147 if (current != NULL) {
15148 const uint8_t *end_to_set = current->base.location.end;
15149 pm_rescue_node_t *clause = parent_node->rescue_clause;
15150
15151 while (clause != NULL) {
15152 clause->base.location.end = end_to_set;
15153 clause = clause->subsequent;
15154 }
15155 }
15156
15157 pm_token_t else_keyword;
15158 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15159 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15160 opening_newline_index = token_newline_index(parser);
15161
15162 else_keyword = parser->current;
15163 opening = &else_keyword;
15164
15165 parser_lex(parser);
15166 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15167
15168 pm_statements_node_t *else_statements = NULL;
15169 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15170 pm_accepts_block_stack_push(parser, true);
15171 pm_context_t context;
15172
15173 switch (type) {
15174 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15175 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15176 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15177 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15178 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15179 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15180 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15181 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15182 }
15183
15184 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15185 pm_accepts_block_stack_pop(parser);
15186
15187 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15188 }
15189
15190 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15191 pm_begin_node_else_clause_set(parent_node, else_clause);
15192
15193 // If we don't have a `current` rescue node, then this is a dangling
15194 // else, and it's an error.
15195 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15196 }
15197
15198 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15199 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15200 pm_token_t ensure_keyword = parser->current;
15201
15202 parser_lex(parser);
15203 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15204
15205 pm_statements_node_t *ensure_statements = NULL;
15206 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15207 pm_accepts_block_stack_push(parser, true);
15208 pm_context_t context;
15209
15210 switch (type) {
15211 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15212 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15213 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15214 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15215 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15216 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15217 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15218 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15219 }
15220
15221 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15222 pm_accepts_block_stack_pop(parser);
15223
15224 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15225 }
15226
15227 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15228 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15229 }
15230
15231 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15232 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15233 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15234 } else {
15235 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15236 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15237 }
15238}
15239
15244static pm_begin_node_t *
15245parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15246 pm_token_t begin_keyword = not_provided(parser);
15247 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15248
15249 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15250 node->base.location.start = start;
15251
15252 return node;
15253}
15254
15259parse_block_parameters(
15260 pm_parser_t *parser,
15261 bool allows_trailing_comma,
15262 const pm_token_t *opening,
15263 bool is_lambda_literal,
15264 bool accepts_blocks_in_defaults,
15265 uint16_t depth
15266) {
15267 pm_parameters_node_t *parameters = NULL;
15268 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15269 parameters = parse_parameters(
15270 parser,
15271 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15272 false,
15273 allows_trailing_comma,
15274 false,
15275 accepts_blocks_in_defaults,
15276 true,
15277 (uint16_t) (depth + 1)
15278 );
15279 }
15280
15281 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15282 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15283 accept1(parser, PM_TOKEN_NEWLINE);
15284
15285 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15286 do {
15287 switch (parser->current.type) {
15288 case PM_TOKEN_CONSTANT:
15289 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15290 parser_lex(parser);
15291 break;
15293 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15294 parser_lex(parser);
15295 break;
15297 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15298 parser_lex(parser);
15299 break;
15301 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15302 parser_lex(parser);
15303 break;
15304 default:
15305 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15306 break;
15307 }
15308
15309 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15310 pm_parser_local_add_token(parser, &parser->previous, 1);
15311
15312 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15313 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15314
15315 pm_block_parameters_node_append_local(block_parameters, local);
15316 } while (accept1(parser, PM_TOKEN_COMMA));
15317 }
15318 }
15319
15320 return block_parameters;
15321}
15322
15327static bool
15328outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15329 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15330 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15331 }
15332
15333 return false;
15334}
15335
15341static const char * const pm_numbered_parameter_names[] = {
15342 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15343};
15344
15350static pm_node_t *
15351parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15352 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15353
15354 // If we have ordinary parameters, then we will return them as the set of
15355 // parameters.
15356 if (parameters != NULL) {
15357 // If we also have implicit parameters, then this is an error.
15358 if (implicit_parameters->size > 0) {
15359 pm_node_t *node = implicit_parameters->nodes[0];
15360
15362 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15364 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15365 } else {
15366 assert(false && "unreachable");
15367 }
15368 }
15369
15370 return parameters;
15371 }
15372
15373 // If we don't have any implicit parameters, then the set of parameters is
15374 // NULL.
15375 if (implicit_parameters->size == 0) {
15376 return NULL;
15377 }
15378
15379 // If we don't have ordinary parameters, then we now must validate our set
15380 // of implicit parameters. We can only have numbered parameters or it, but
15381 // they cannot be mixed.
15382 uint8_t numbered_parameter = 0;
15383 bool it_parameter = false;
15384
15385 for (size_t index = 0; index < implicit_parameters->size; index++) {
15386 pm_node_t *node = implicit_parameters->nodes[index];
15387
15389 if (it_parameter) {
15390 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15391 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15392 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15393 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15394 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15395 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15396 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15397 } else {
15398 assert(false && "unreachable");
15399 }
15401 if (numbered_parameter > 0) {
15402 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15403 } else {
15404 it_parameter = true;
15405 }
15406 }
15407 }
15408
15409 if (numbered_parameter > 0) {
15410 // Go through the parent scopes and mark them as being disallowed from
15411 // using numbered parameters because this inner scope is using them.
15412 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15413 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15414 }
15415
15416 const pm_location_t location = { .start = opening->start, .end = closing->end };
15417 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15418 }
15419
15420 if (it_parameter) {
15421 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15422 }
15423
15424 return NULL;
15425}
15426
15430static pm_block_node_t *
15431parse_block(pm_parser_t *parser, uint16_t depth) {
15432 pm_token_t opening = parser->previous;
15433 accept1(parser, PM_TOKEN_NEWLINE);
15434
15435 pm_accepts_block_stack_push(parser, true);
15436 pm_parser_scope_push(parser, false);
15437
15438 pm_block_parameters_node_t *block_parameters = NULL;
15439
15440 if (accept1(parser, PM_TOKEN_PIPE)) {
15441 pm_token_t block_parameters_opening = parser->previous;
15442 if (match1(parser, PM_TOKEN_PIPE)) {
15443 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15444 parser->command_start = true;
15445 parser_lex(parser);
15446 } else {
15447 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15448 accept1(parser, PM_TOKEN_NEWLINE);
15449 parser->command_start = true;
15450 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15451 }
15452
15453 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15454 }
15455
15456 accept1(parser, PM_TOKEN_NEWLINE);
15457 pm_node_t *statements = NULL;
15458
15459 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15460 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15461 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15462 }
15463
15464 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15465 } else {
15466 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15468 pm_accepts_block_stack_push(parser, true);
15469 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15470 pm_accepts_block_stack_pop(parser);
15471 }
15472
15473 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15474 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15475 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15476 }
15477 }
15478
15479 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15480 }
15481
15482 pm_constant_id_list_t locals;
15483 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15484 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15485
15486 pm_parser_scope_pop(parser);
15487 pm_accepts_block_stack_pop(parser);
15488
15489 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15490}
15491
15497static bool
15498parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15499 bool found = false;
15500
15501 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15502 found |= true;
15503 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15504
15505 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15506 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15507 } else {
15508 pm_accepts_block_stack_push(parser, true);
15509 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15510
15511 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15512 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15513 parser->previous.start = parser->previous.end;
15514 parser->previous.type = PM_TOKEN_MISSING;
15515 }
15516
15517 pm_accepts_block_stack_pop(parser);
15518 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15519 }
15520 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15521 found |= true;
15522 pm_accepts_block_stack_push(parser, false);
15523
15524 // If we get here, then the subsequent token cannot be used as an infix
15525 // operator. In this case we assume the subsequent token is part of an
15526 // argument to this method call.
15527 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15528
15529 // If we have done with the arguments and still not consumed the comma,
15530 // then we have a trailing comma where we need to check whether it is
15531 // allowed or not.
15532 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15533 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15534 }
15535
15536 pm_accepts_block_stack_pop(parser);
15537 }
15538
15539 // If we're at the end of the arguments, we can now check if there is a block
15540 // node that starts with a {. If there is, then we can parse it and add it to
15541 // the arguments.
15542 if (accepts_block) {
15543 pm_block_node_t *block = NULL;
15544
15545 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15546 found |= true;
15547 block = parse_block(parser, (uint16_t) (depth + 1));
15548 pm_arguments_validate_block(parser, arguments, block);
15549 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15550 found |= true;
15551 block = parse_block(parser, (uint16_t) (depth + 1));
15552 }
15553
15554 if (block != NULL) {
15555 if (arguments->block == NULL && !arguments->has_forwarding) {
15556 arguments->block = (pm_node_t *) block;
15557 } else {
15558 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15559
15560 if (arguments->block != NULL) {
15561 if (arguments->arguments == NULL) {
15562 arguments->arguments = pm_arguments_node_create(parser);
15563 }
15564 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15565 }
15566 arguments->block = (pm_node_t *) block;
15567 }
15568 }
15569 }
15570
15571 return found;
15572}
15573
15578static void
15579parse_return(pm_parser_t *parser, pm_node_t *node) {
15580 bool in_sclass = false;
15581 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15582 switch (context_node->context) {
15586 case PM_CONTEXT_BEGIN:
15587 case PM_CONTEXT_CASE_IN:
15590 case PM_CONTEXT_DEFINED:
15591 case PM_CONTEXT_ELSE:
15592 case PM_CONTEXT_ELSIF:
15593 case PM_CONTEXT_EMBEXPR:
15595 case PM_CONTEXT_FOR:
15596 case PM_CONTEXT_IF:
15598 case PM_CONTEXT_MAIN:
15600 case PM_CONTEXT_PARENS:
15601 case PM_CONTEXT_POSTEXE:
15603 case PM_CONTEXT_PREEXE:
15605 case PM_CONTEXT_TERNARY:
15606 case PM_CONTEXT_UNLESS:
15607 case PM_CONTEXT_UNTIL:
15608 case PM_CONTEXT_WHILE:
15609 // Keep iterating up the lists of contexts, because returns can
15610 // see through these.
15611 continue;
15615 case PM_CONTEXT_SCLASS:
15616 in_sclass = true;
15617 continue;
15621 case PM_CONTEXT_CLASS:
15625 case PM_CONTEXT_MODULE:
15626 // These contexts are invalid for a return.
15627 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15628 return;
15638 case PM_CONTEXT_DEF:
15644 // These contexts are valid for a return, and we should not
15645 // continue to loop.
15646 return;
15647 case PM_CONTEXT_NONE:
15648 // This case should never happen.
15649 assert(false && "unreachable");
15650 break;
15651 }
15652 }
15653 if (in_sclass) {
15654 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15655 }
15656}
15657
15662static void
15663parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15664 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15665 switch (context_node->context) {
15671 case PM_CONTEXT_DEFINED:
15672 case PM_CONTEXT_FOR:
15679 case PM_CONTEXT_POSTEXE:
15680 case PM_CONTEXT_UNTIL:
15681 case PM_CONTEXT_WHILE:
15682 // These are the good cases. We're allowed to have a block exit
15683 // in these contexts.
15684 return;
15685 case PM_CONTEXT_DEF:
15690 case PM_CONTEXT_MAIN:
15691 case PM_CONTEXT_PREEXE:
15692 case PM_CONTEXT_SCLASS:
15696 // These are the bad cases. We're not allowed to have a block
15697 // exit in these contexts.
15698 //
15699 // If we get here, then we're about to mark this block exit
15700 // as invalid. However, it could later _become_ valid if we
15701 // find a trailing while/until on the expression. In this
15702 // case instead of adding the error here, we'll add the
15703 // block exit to the list of exits for the expression, and
15704 // the node parsing will handle validating it instead.
15705 assert(parser->current_block_exits != NULL);
15706 pm_node_list_append(parser->current_block_exits, node);
15707 return;
15711 case PM_CONTEXT_BEGIN:
15712 case PM_CONTEXT_CASE_IN:
15717 case PM_CONTEXT_CLASS:
15719 case PM_CONTEXT_ELSE:
15720 case PM_CONTEXT_ELSIF:
15721 case PM_CONTEXT_EMBEXPR:
15723 case PM_CONTEXT_IF:
15727 case PM_CONTEXT_MODULE:
15729 case PM_CONTEXT_PARENS:
15732 case PM_CONTEXT_TERNARY:
15733 case PM_CONTEXT_UNLESS:
15734 // In these contexts we should continue walking up the list of
15735 // contexts.
15736 break;
15737 case PM_CONTEXT_NONE:
15738 // This case should never happen.
15739 assert(false && "unreachable");
15740 break;
15741 }
15742 }
15743}
15744
15749static pm_node_list_t *
15750push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15751 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15752 parser->current_block_exits = current_block_exits;
15753 return previous_block_exits;
15754}
15755
15761static void
15762flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15763 pm_node_t *block_exit;
15764 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15765 const char *type;
15766
15767 switch (PM_NODE_TYPE(block_exit)) {
15768 case PM_BREAK_NODE: type = "break"; break;
15769 case PM_NEXT_NODE: type = "next"; break;
15770 case PM_REDO_NODE: type = "redo"; break;
15771 default: assert(false && "unreachable"); type = ""; break;
15772 }
15773
15774 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15775 }
15776
15777 parser->current_block_exits = previous_block_exits;
15778}
15779
15784static void
15785pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15787 // If we matched a trailing while/until, then all of the block exits in
15788 // the contained list are valid. In this case we do not need to do
15789 // anything.
15790 parser->current_block_exits = previous_block_exits;
15791 } else if (previous_block_exits != NULL) {
15792 // If we did not matching a trailing while/until, then all of the block
15793 // exits contained in the list are invalid for this specific context.
15794 // However, they could still become valid in a higher level context if
15795 // there is another list above this one. In this case we'll push all of
15796 // the block exits up to the previous list.
15797 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15798 parser->current_block_exits = previous_block_exits;
15799 } else {
15800 // If we did not match a trailing while/until and this was the last
15801 // chance to do so, then all of the block exits in the list are invalid
15802 // and we need to add an error for each of them.
15803 flush_block_exits(parser, previous_block_exits);
15804 }
15805}
15806
15807static inline pm_node_t *
15808parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15809 context_push(parser, PM_CONTEXT_PREDICATE);
15810 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15811 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15812
15813 // Predicates are closed by a term, a "then", or a term and then a "then".
15814 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15815
15816 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15817 predicate_closed = true;
15818 *then_keyword = parser->previous;
15819 }
15820
15821 if (!predicate_closed) {
15822 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15823 }
15824
15825 context_pop(parser);
15826 return predicate;
15827}
15828
15829static inline pm_node_t *
15830parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15831 pm_node_list_t current_block_exits = { 0 };
15832 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15833
15834 pm_token_t keyword = parser->previous;
15835 pm_token_t then_keyword = not_provided(parser);
15836
15837 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15838 pm_statements_node_t *statements = NULL;
15839
15841 pm_accepts_block_stack_push(parser, true);
15842 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15843 pm_accepts_block_stack_pop(parser);
15844 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15845 }
15846
15847 pm_token_t end_keyword = not_provided(parser);
15848 pm_node_t *parent = NULL;
15849
15850 switch (context) {
15851 case PM_CONTEXT_IF:
15852 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15853 break;
15854 case PM_CONTEXT_UNLESS:
15855 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15856 break;
15857 default:
15858 assert(false && "unreachable");
15859 break;
15860 }
15861
15862 pm_node_t *current = parent;
15863
15864 // Parse any number of elsif clauses. This will form a linked list of if
15865 // nodes pointing to each other from the top.
15866 if (context == PM_CONTEXT_IF) {
15867 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15868 if (parser_end_of_line_p(parser)) {
15869 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15870 }
15871
15872 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15873 pm_token_t elsif_keyword = parser->current;
15874 parser_lex(parser);
15875
15876 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15877 pm_accepts_block_stack_push(parser, true);
15878
15879 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15880 pm_accepts_block_stack_pop(parser);
15881 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15882
15883 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15884 ((pm_if_node_t *) current)->subsequent = elsif;
15885 current = elsif;
15886 }
15887 }
15888
15889 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15890 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15891 opening_newline_index = token_newline_index(parser);
15892
15893 parser_lex(parser);
15894 pm_token_t else_keyword = parser->previous;
15895
15896 pm_accepts_block_stack_push(parser, true);
15897 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15898 pm_accepts_block_stack_pop(parser);
15899
15900 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15901 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15902 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15903
15904 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15905
15906 switch (context) {
15907 case PM_CONTEXT_IF:
15908 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15909 break;
15910 case PM_CONTEXT_UNLESS:
15911 ((pm_unless_node_t *) parent)->else_clause = else_node;
15912 break;
15913 default:
15914 assert(false && "unreachable");
15915 break;
15916 }
15917 } else {
15918 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15919 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15920 }
15921
15922 // Set the appropriate end location for all of the nodes in the subtree.
15923 switch (context) {
15924 case PM_CONTEXT_IF: {
15925 pm_node_t *current = parent;
15926 bool recursing = true;
15927
15928 while (recursing) {
15929 switch (PM_NODE_TYPE(current)) {
15930 case PM_IF_NODE:
15931 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15932 current = ((pm_if_node_t *) current)->subsequent;
15933 recursing = current != NULL;
15934 break;
15935 case PM_ELSE_NODE:
15936 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15937 recursing = false;
15938 break;
15939 default: {
15940 recursing = false;
15941 break;
15942 }
15943 }
15944 }
15945 break;
15946 }
15947 case PM_CONTEXT_UNLESS:
15948 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15949 break;
15950 default:
15951 assert(false && "unreachable");
15952 break;
15953 }
15954
15955 pop_block_exits(parser, previous_block_exits);
15956 pm_node_list_free(&current_block_exits);
15957
15958 return parent;
15959}
15960
15965#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15966 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15967 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15968 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15969 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15970 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15971 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15972 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15973 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15974 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15975 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15976
15981#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15982 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15983 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15984 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15985 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15986 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15987 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15988 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15989
15995#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15996 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15997 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15998 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15999 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16000 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16001 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16002 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16003 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16004
16009#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16010 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16011 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16012 case PM_TOKEN_CLASS_VARIABLE
16013
16018#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16019 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16020 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16021 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16022
16023// Assert here that the flags are the same so that we can safely switch the type
16024// of the node without having to move the flags.
16025PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16026
16031static inline pm_node_flags_t
16032parse_unescaped_encoding(const pm_parser_t *parser) {
16033 if (parser->explicit_encoding != NULL) {
16035 // If the there's an explicit encoding and it's using a UTF-8 escape
16036 // sequence, then mark the string as UTF-8.
16038 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16039 // If there's a non-UTF-8 escape sequence being used, then the
16040 // string uses the source encoding, unless the source is marked as
16041 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16042 // order to keep the string valid.
16044 }
16045 }
16046 return 0;
16047}
16048
16053static pm_node_t *
16054parse_string_part(pm_parser_t *parser, uint16_t depth) {
16055 switch (parser->current.type) {
16056 // Here the lexer has returned to us plain string content. In this case
16057 // we'll create a string node that has no opening or closing and return that
16058 // as the part. These kinds of parts look like:
16059 //
16060 // "aaa #{bbb} #@ccc ddd"
16061 // ^^^^ ^ ^^^^
16063 pm_token_t opening = not_provided(parser);
16064 pm_token_t closing = not_provided(parser);
16065
16066 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16067 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16068
16069 parser_lex(parser);
16070 return node;
16071 }
16072 // Here the lexer has returned the beginning of an embedded expression. In
16073 // that case we'll parse the inner statements and return that as the part.
16074 // These kinds of parts look like:
16075 //
16076 // "aaa #{bbb} #@ccc ddd"
16077 // ^^^^^^
16079 // Ruby disallows seeing encoding around interpolation in strings,
16080 // even though it is known at parse time.
16081 parser->explicit_encoding = NULL;
16082
16083 pm_lex_state_t state = parser->lex_state;
16084 int brace_nesting = parser->brace_nesting;
16085
16086 parser->brace_nesting = 0;
16087 lex_state_set(parser, PM_LEX_STATE_BEG);
16088 parser_lex(parser);
16089
16090 pm_token_t opening = parser->previous;
16091 pm_statements_node_t *statements = NULL;
16092
16093 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16094 pm_accepts_block_stack_push(parser, true);
16095 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16096 pm_accepts_block_stack_pop(parser);
16097 }
16098
16099 parser->brace_nesting = brace_nesting;
16100 lex_state_set(parser, state);
16101
16102 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16103 pm_token_t closing = parser->previous;
16104
16105 // If this set of embedded statements only contains a single
16106 // statement, then Ruby does not consider it as a possible statement
16107 // that could emit a line event.
16108 if (statements != NULL && statements->body.size == 1) {
16109 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16110 }
16111
16112 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16113 }
16114
16115 // Here the lexer has returned the beginning of an embedded variable.
16116 // In that case we'll parse the variable and create an appropriate node
16117 // for it and then return that node. These kinds of parts look like:
16118 //
16119 // "aaa #{bbb} #@ccc ddd"
16120 // ^^^^^
16121 case PM_TOKEN_EMBVAR: {
16122 // Ruby disallows seeing encoding around interpolation in strings,
16123 // even though it is known at parse time.
16124 parser->explicit_encoding = NULL;
16125
16126 lex_state_set(parser, PM_LEX_STATE_BEG);
16127 parser_lex(parser);
16128
16129 pm_token_t operator = parser->previous;
16130 pm_node_t *variable;
16131
16132 switch (parser->current.type) {
16133 // In this case a back reference is being interpolated. We'll
16134 // create a global variable read node.
16136 parser_lex(parser);
16137 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16138 break;
16139 // In this case an nth reference is being interpolated. We'll
16140 // create a global variable read node.
16142 parser_lex(parser);
16143 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16144 break;
16145 // In this case a global variable is being interpolated. We'll
16146 // create a global variable read node.
16148 parser_lex(parser);
16149 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16150 break;
16151 // In this case an instance variable is being interpolated.
16152 // We'll create an instance variable read node.
16154 parser_lex(parser);
16155 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16156 break;
16157 // In this case a class variable is being interpolated. We'll
16158 // create a class variable read node.
16160 parser_lex(parser);
16161 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16162 break;
16163 // We can hit here if we got an invalid token. In that case
16164 // we'll not attempt to lex this token and instead just return a
16165 // missing node.
16166 default:
16167 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16168 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16169 break;
16170 }
16171
16172 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16173 }
16174 default:
16175 parser_lex(parser);
16176 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16177 return NULL;
16178 }
16179}
16180
16186static const uint8_t *
16187parse_operator_symbol_name(const pm_token_t *name) {
16188 switch (name->type) {
16189 case PM_TOKEN_TILDE:
16190 case PM_TOKEN_BANG:
16191 if (name->end[-1] == '@') return name->end - 1;
16193 default:
16194 return name->end;
16195 }
16196}
16197
16198static pm_node_t *
16199parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16200 pm_token_t closing = not_provided(parser);
16201 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16202
16203 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16204
16205 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16206 parser_lex(parser);
16207
16208 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16209 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16210
16211 return (pm_node_t *) symbol;
16212}
16213
16219static pm_node_t *
16220parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16221 const pm_token_t opening = parser->previous;
16222
16223 if (lex_mode->mode != PM_LEX_STRING) {
16224 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16225
16226 switch (parser->current.type) {
16227 case PM_CASE_OPERATOR:
16228 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16230 case PM_TOKEN_CONSTANT:
16237 case PM_CASE_KEYWORD:
16238 parser_lex(parser);
16239 break;
16240 default:
16241 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16242 break;
16243 }
16244
16245 pm_token_t closing = not_provided(parser);
16246 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16247
16248 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16249 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16250
16251 return (pm_node_t *) symbol;
16252 }
16253
16254 if (lex_mode->as.string.interpolation) {
16255 // If we have the end of the symbol, then we can return an empty symbol.
16256 if (match1(parser, PM_TOKEN_STRING_END)) {
16257 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16258 parser_lex(parser);
16259
16260 pm_token_t content = not_provided(parser);
16261 pm_token_t closing = parser->previous;
16262 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16263 }
16264
16265 // Now we can parse the first part of the symbol.
16266 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16267
16268 // If we got a string part, then it's possible that we could transform
16269 // what looks like an interpolated symbol into a regular symbol.
16270 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16271 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16272 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16273
16274 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16275 }
16276
16277 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16278 if (part) pm_interpolated_symbol_node_append(symbol, part);
16279
16280 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16281 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16282 pm_interpolated_symbol_node_append(symbol, part);
16283 }
16284 }
16285
16286 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16287 if (match1(parser, PM_TOKEN_EOF)) {
16288 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16289 } else {
16290 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16291 }
16292
16293 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16294 return (pm_node_t *) symbol;
16295 }
16296
16297 pm_token_t content;
16298 pm_string_t unescaped;
16299
16300 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16301 content = parser->current;
16302 unescaped = parser->current_string;
16303 parser_lex(parser);
16304
16305 // If we have two string contents in a row, then the content of this
16306 // symbol is split because of heredoc contents. This looks like:
16307 //
16308 // <<A; :'a
16309 // A
16310 // b'
16311 //
16312 // In this case, the best way we have to represent this is as an
16313 // interpolated string node, so that's what we'll do here.
16314 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16315 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16316 pm_token_t bounds = not_provided(parser);
16317
16318 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16319 pm_interpolated_symbol_node_append(symbol, part);
16320
16321 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16322 pm_interpolated_symbol_node_append(symbol, part);
16323
16324 if (next_state != PM_LEX_STATE_NONE) {
16325 lex_state_set(parser, next_state);
16326 }
16327
16328 parser_lex(parser);
16329 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16330
16331 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16332 return (pm_node_t *) symbol;
16333 }
16334 } else {
16335 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16336 pm_string_shared_init(&unescaped, content.start, content.end);
16337 }
16338
16339 if (next_state != PM_LEX_STATE_NONE) {
16340 lex_state_set(parser, next_state);
16341 }
16342
16343 if (match1(parser, PM_TOKEN_EOF)) {
16344 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16345 } else {
16346 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16347 }
16348
16349 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16350}
16351
16356static inline pm_node_t *
16357parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16358 switch (parser->current.type) {
16359 case PM_CASE_OPERATOR: {
16360 const pm_token_t opening = not_provided(parser);
16361 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16362 }
16363 case PM_CASE_KEYWORD:
16364 case PM_TOKEN_CONSTANT:
16366 case PM_TOKEN_METHOD_NAME: {
16367 parser_lex(parser);
16368
16369 pm_token_t opening = not_provided(parser);
16370 pm_token_t closing = not_provided(parser);
16371 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16372
16373 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16374 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16375
16376 return (pm_node_t *) symbol;
16377 }
16378 case PM_TOKEN_SYMBOL_BEGIN: {
16379 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16380 parser_lex(parser);
16381
16382 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16383 }
16384 default:
16385 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16386 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16387 }
16388}
16389
16396static inline pm_node_t *
16397parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16398 switch (parser->current.type) {
16399 case PM_CASE_OPERATOR: {
16400 const pm_token_t opening = not_provided(parser);
16401 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16402 }
16403 case PM_CASE_KEYWORD:
16404 case PM_TOKEN_CONSTANT:
16406 case PM_TOKEN_METHOD_NAME: {
16407 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16408 parser_lex(parser);
16409
16410 pm_token_t opening = not_provided(parser);
16411 pm_token_t closing = not_provided(parser);
16412 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16413
16414 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16415 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16416
16417 return (pm_node_t *) symbol;
16418 }
16419 case PM_TOKEN_SYMBOL_BEGIN: {
16420 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16421 parser_lex(parser);
16422
16423 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16424 }
16426 parser_lex(parser);
16427 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16429 parser_lex(parser);
16430 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16432 parser_lex(parser);
16433 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16434 default:
16435 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16436 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16437 }
16438}
16439
16444static pm_node_t *
16445parse_variable(pm_parser_t *parser) {
16446 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16447 int depth;
16448 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16449
16450 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16451 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16452 }
16453
16454 pm_scope_t *current_scope = parser->current_scope;
16455 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16456 if (is_numbered_param) {
16457 // When you use a numbered parameter, it implies the existence of
16458 // all of the locals that exist before it. For example, referencing
16459 // _2 means that _1 must exist. Therefore here we loop through all
16460 // of the possibilities and add them into the constant pool.
16461 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16462 for (uint8_t number = 1; number <= maximum; number++) {
16463 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16464 }
16465
16466 if (!match1(parser, PM_TOKEN_EQUAL)) {
16467 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16468 }
16469
16470 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16471 pm_node_list_append(&current_scope->implicit_parameters, node);
16472
16473 return node;
16474 } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16475 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16476 pm_node_list_append(&current_scope->implicit_parameters, node);
16477
16478 return node;
16479 }
16480 }
16481
16482 return NULL;
16483}
16484
16488static pm_node_t *
16489parse_variable_call(pm_parser_t *parser) {
16490 pm_node_flags_t flags = 0;
16491
16492 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16493 pm_node_t *node = parse_variable(parser);
16494 if (node != NULL) return node;
16496 }
16497
16498 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16499 pm_node_flag_set((pm_node_t *)node, flags);
16500
16501 return (pm_node_t *) node;
16502}
16503
16509static inline pm_token_t
16510parse_method_definition_name(pm_parser_t *parser) {
16511 switch (parser->current.type) {
16512 case PM_CASE_KEYWORD:
16513 case PM_TOKEN_CONSTANT:
16515 parser_lex(parser);
16516 return parser->previous;
16518 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16519 parser_lex(parser);
16520 return parser->previous;
16521 case PM_CASE_OPERATOR:
16522 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16523 parser_lex(parser);
16524 return parser->previous;
16525 default:
16526 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16527 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16528 }
16529}
16530
16531static void
16532parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16533 // Get a reference to the string struct that is being held by the string
16534 // node. This is the value we're going to actually manipulate.
16535 pm_string_ensure_owned(string);
16536
16537 // Now get the bounds of the existing string. We'll use this as a
16538 // destination to move bytes into. We'll also use it for bounds checking
16539 // since we don't require that these strings be null terminated.
16540 size_t dest_length = pm_string_length(string);
16541 const uint8_t *source_cursor = (uint8_t *) string->source;
16542 const uint8_t *source_end = source_cursor + dest_length;
16543
16544 // We're going to move bytes backward in the string when we get leading
16545 // whitespace, so we'll maintain a pointer to the current position in the
16546 // string that we're writing to.
16547 size_t trimmed_whitespace = 0;
16548
16549 // While we haven't reached the amount of common whitespace that we need to
16550 // trim and we haven't reached the end of the string, we'll keep trimming
16551 // whitespace. Trimming in this context means skipping over these bytes such
16552 // that they aren't copied into the new string.
16553 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16554 if (*source_cursor == '\t') {
16555 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16556 if (trimmed_whitespace > common_whitespace) break;
16557 } else {
16558 trimmed_whitespace++;
16559 }
16560
16561 source_cursor++;
16562 dest_length--;
16563 }
16564
16565 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16566 string->length = dest_length;
16567}
16568
16572static void
16573parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16574 // The next node should be dedented if it's the first node in the list or if
16575 // it follows a string node.
16576 bool dedent_next = true;
16577
16578 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16579 // keep around two indices: a read and a write. If we end up trimming all of
16580 // the whitespace from a node, then we'll drop it from the list entirely.
16581 size_t write_index = 0;
16582
16583 pm_node_t *node;
16584 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16585 // We're not manipulating child nodes that aren't strings. In this case
16586 // we'll skip past it and indicate that the subsequent node should not
16587 // be dedented.
16588 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16589 nodes->nodes[write_index++] = node;
16590 dedent_next = false;
16591 continue;
16592 }
16593
16594 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16595 if (dedent_next) {
16596 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16597 }
16598
16599 if (string_node->unescaped.length == 0) {
16600 pm_node_destroy(parser, node);
16601 } else {
16602 nodes->nodes[write_index++] = node;
16603 }
16604
16605 // We always dedent the next node if it follows a string node.
16606 dedent_next = true;
16607 }
16608
16609 nodes->size = write_index;
16610}
16611
16615static pm_token_t
16616parse_strings_empty_content(const uint8_t *location) {
16617 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16618}
16619
16623static inline pm_node_t *
16624parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16625 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16626 bool concating = false;
16627
16628 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16629 pm_node_t *node = NULL;
16630
16631 // Here we have found a string literal. We'll parse it and add it to
16632 // the list of strings.
16633 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16634 assert(lex_mode->mode == PM_LEX_STRING);
16635 bool lex_interpolation = lex_mode->as.string.interpolation;
16636 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16637
16638 pm_token_t opening = parser->current;
16639 parser_lex(parser);
16640
16641 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16642 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16643 // If we get here, then we have an end immediately after a
16644 // start. In that case we'll create an empty content token and
16645 // return an uninterpolated string.
16646 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16647 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16648
16649 pm_string_shared_init(&string->unescaped, content.start, content.end);
16650 node = (pm_node_t *) string;
16651 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16652 // If we get here, then we have an end of a label immediately
16653 // after a start. In that case we'll create an empty symbol
16654 // node.
16655 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16656 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16657
16658 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16659 node = (pm_node_t *) symbol;
16660
16661 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16662 } else if (!lex_interpolation) {
16663 // If we don't accept interpolation then we expect the string to
16664 // start with a single string content node.
16665 pm_string_t unescaped;
16666 pm_token_t content;
16667
16668 if (match1(parser, PM_TOKEN_EOF)) {
16669 unescaped = PM_STRING_EMPTY;
16670 content = not_provided(parser);
16671 } else {
16672 unescaped = parser->current_string;
16673 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16674 content = parser->previous;
16675 }
16676
16677 // It is unfortunately possible to have multiple string content
16678 // nodes in a row in the case that there's heredoc content in
16679 // the middle of the string, like this cursed example:
16680 //
16681 // <<-END+'b
16682 // a
16683 // END
16684 // c'+'d'
16685 //
16686 // In that case we need to switch to an interpolated string to
16687 // be able to contain all of the parts.
16688 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16689 pm_node_list_t parts = { 0 };
16690
16691 pm_token_t delimiters = not_provided(parser);
16692 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16693 pm_node_list_append(&parts, part);
16694
16695 do {
16696 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16697 pm_node_list_append(&parts, part);
16698 parser_lex(parser);
16699 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16700
16701 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16702 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16703
16704 pm_node_list_free(&parts);
16705 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16706 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16707 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16708 } else if (match1(parser, PM_TOKEN_EOF)) {
16709 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16710 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16711 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16712 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16713 } else {
16714 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16715 parser->previous.start = parser->previous.end;
16716 parser->previous.type = PM_TOKEN_MISSING;
16717 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16718 }
16719 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16720 // In this case we've hit string content so we know the string
16721 // at least has something in it. We'll need to check if the
16722 // following token is the end (in which case we can return a
16723 // plain string) or if it's not then it has interpolation.
16724 pm_token_t content = parser->current;
16725 pm_string_t unescaped = parser->current_string;
16726 parser_lex(parser);
16727
16728 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16729 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16730 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16731
16732 // Kind of odd behavior, but basically if we have an
16733 // unterminated string and it ends in a newline, we back up one
16734 // character so that the error message is on the last line of
16735 // content in the string.
16736 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16737 const uint8_t *location = parser->previous.end;
16738 if (location > parser->start && location[-1] == '\n') location--;
16739 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16740
16741 parser->previous.start = parser->previous.end;
16742 parser->previous.type = PM_TOKEN_MISSING;
16743 }
16744 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16745 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16746 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16747 } else {
16748 // If we get here, then we have interpolation so we'll need
16749 // to create a string or symbol node with interpolation.
16750 pm_node_list_t parts = { 0 };
16751 pm_token_t string_opening = not_provided(parser);
16752 pm_token_t string_closing = not_provided(parser);
16753
16754 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16755 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16756 pm_node_list_append(&parts, part);
16757
16758 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16759 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16760 pm_node_list_append(&parts, part);
16761 }
16762 }
16763
16764 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16765 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16766 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16767 } else if (match1(parser, PM_TOKEN_EOF)) {
16768 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16769 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16770 } else {
16771 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16772 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16773 }
16774
16775 pm_node_list_free(&parts);
16776 }
16777 } else {
16778 // If we get here, then the first part of the string is not plain
16779 // string content, in which case we need to parse the string as an
16780 // interpolated string.
16781 pm_node_list_t parts = { 0 };
16782 pm_node_t *part;
16783
16784 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16785 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16786 pm_node_list_append(&parts, part);
16787 }
16788 }
16789
16790 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16791 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16792 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16793 } else if (match1(parser, PM_TOKEN_EOF)) {
16794 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16795 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16796 } else {
16797 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16798 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16799 }
16800
16801 pm_node_list_free(&parts);
16802 }
16803
16804 if (current == NULL) {
16805 // If the node we just parsed is a symbol node, then we can't
16806 // concatenate it with anything else, so we can now return that
16807 // node.
16809 return node;
16810 }
16811
16812 // If we don't already have a node, then it's fine and we can just
16813 // set the result to be the node we just parsed.
16814 current = node;
16815 } else {
16816 // Otherwise we need to check the type of the node we just parsed.
16817 // If it cannot be concatenated with the previous node, then we'll
16818 // need to add a syntax error.
16820 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16821 }
16822
16823 // If we haven't already created our container for concatenation,
16824 // we'll do that now.
16825 if (!concating) {
16826 concating = true;
16827 pm_token_t bounds = not_provided(parser);
16828
16829 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16830 pm_interpolated_string_node_append(container, current);
16831 current = (pm_node_t *) container;
16832 }
16833
16834 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16835 }
16836 }
16837
16838 return current;
16839}
16840
16841#define PM_PARSE_PATTERN_SINGLE 0
16842#define PM_PARSE_PATTERN_TOP 1
16843#define PM_PARSE_PATTERN_MULTI 2
16844
16845static pm_node_t *
16846parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16847
16853static void
16854parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16855 // Skip this capture if it starts with an underscore.
16856 if (*location->start == '_') return;
16857
16858 if (pm_constant_id_list_includes(captures, capture)) {
16859 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16860 } else {
16861 pm_constant_id_list_append(captures, capture);
16862 }
16863}
16864
16868static pm_node_t *
16869parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16870 // Now, if there are any :: operators that follow, parse them as constant
16871 // path nodes.
16872 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16873 pm_token_t delimiter = parser->previous;
16874 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16875 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16876 }
16877
16878 // If there is a [ or ( that follows, then this is part of a larger pattern
16879 // expression. We'll parse the inner pattern here, then modify the returned
16880 // inner pattern with our constant path attached.
16881 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16882 return node;
16883 }
16884
16885 pm_token_t opening;
16886 pm_token_t closing;
16887 pm_node_t *inner = NULL;
16888
16889 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16890 opening = parser->previous;
16891 accept1(parser, PM_TOKEN_NEWLINE);
16892
16893 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16894 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16895 accept1(parser, PM_TOKEN_NEWLINE);
16896 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16897 }
16898
16899 closing = parser->previous;
16900 } else {
16901 parser_lex(parser);
16902 opening = parser->previous;
16903 accept1(parser, PM_TOKEN_NEWLINE);
16904
16905 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16906 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16907 accept1(parser, PM_TOKEN_NEWLINE);
16908 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16909 }
16910
16911 closing = parser->previous;
16912 }
16913
16914 if (!inner) {
16915 // If there was no inner pattern, then we have something like Foo() or
16916 // Foo[]. In that case we'll create an array pattern with no requireds.
16917 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16918 }
16919
16920 // Now that we have the inner pattern, check to see if it's an array, find,
16921 // or hash pattern. If it is, then we'll attach our constant path to it if
16922 // it doesn't already have a constant. If it's not one of those node types
16923 // or it does have a constant, then we'll create an array pattern.
16924 switch (PM_NODE_TYPE(inner)) {
16925 case PM_ARRAY_PATTERN_NODE: {
16926 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16927
16928 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16929 pattern_node->base.location.start = node->location.start;
16930 pattern_node->base.location.end = closing.end;
16931
16932 pattern_node->constant = node;
16933 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16934 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16935
16936 return (pm_node_t *) pattern_node;
16937 }
16938
16939 break;
16940 }
16941 case PM_FIND_PATTERN_NODE: {
16942 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16943
16944 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16945 pattern_node->base.location.start = node->location.start;
16946 pattern_node->base.location.end = closing.end;
16947
16948 pattern_node->constant = node;
16949 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16950 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16951
16952 return (pm_node_t *) pattern_node;
16953 }
16954
16955 break;
16956 }
16957 case PM_HASH_PATTERN_NODE: {
16958 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16959
16960 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16961 pattern_node->base.location.start = node->location.start;
16962 pattern_node->base.location.end = closing.end;
16963
16964 pattern_node->constant = node;
16965 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16966 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16967
16968 return (pm_node_t *) pattern_node;
16969 }
16970
16971 break;
16972 }
16973 default:
16974 break;
16975 }
16976
16977 // If we got here, then we didn't return one of the inner patterns by
16978 // attaching its constant. In this case we'll create an array pattern and
16979 // attach our constant to it.
16980 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16981 pm_array_pattern_node_requireds_append(pattern_node, inner);
16982 return (pm_node_t *) pattern_node;
16983}
16984
16988static pm_splat_node_t *
16989parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16990 assert(parser->previous.type == PM_TOKEN_USTAR);
16991 pm_token_t operator = parser->previous;
16992 pm_node_t *name = NULL;
16993
16994 // Rest patterns don't necessarily have a name associated with them. So we
16995 // will check for that here. If they do, then we'll add it to the local
16996 // table since this pattern will cause it to become a local variable.
16997 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16998 pm_token_t identifier = parser->previous;
16999 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17000
17001 int depth;
17002 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17003 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17004 }
17005
17006 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17007 name = (pm_node_t *) pm_local_variable_target_node_create(
17008 parser,
17009 &PM_LOCATION_TOKEN_VALUE(&identifier),
17010 constant_id,
17011 (uint32_t) (depth == -1 ? 0 : depth)
17012 );
17013 }
17014
17015 // Finally we can return the created node.
17016 return pm_splat_node_create(parser, &operator, name);
17017}
17018
17022static pm_node_t *
17023parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17024 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17025 parser_lex(parser);
17026
17027 pm_token_t operator = parser->previous;
17028 pm_node_t *value = NULL;
17029
17030 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17031 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17032 }
17033
17034 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17035 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17036
17037 int depth;
17038 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17039 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17040 }
17041
17042 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17043 value = (pm_node_t *) pm_local_variable_target_node_create(
17044 parser,
17045 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17046 constant_id,
17047 (uint32_t) (depth == -1 ? 0 : depth)
17048 );
17049 }
17050
17051 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17052}
17053
17058static bool
17059pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17060 ptrdiff_t length = end - start;
17061 if (length == 0) return false;
17062
17063 // First ensure that it starts with a valid identifier starting character.
17064 size_t width = char_is_identifier_start(parser, start, end - start);
17065 if (width == 0) return false;
17066
17067 // Next, ensure that it's not an uppercase character.
17068 if (parser->encoding_changed) {
17069 if (parser->encoding->isupper_char(start, length)) return false;
17070 } else {
17071 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17072 }
17073
17074 // Next, iterate through all of the bytes of the string to ensure that they
17075 // are all valid identifier characters.
17076 const uint8_t *cursor = start + width;
17077 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17078 return cursor == end;
17079}
17080
17085static pm_node_t *
17086parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17087 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17088
17089 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17090 int depth = -1;
17091
17092 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17093 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17094 } else {
17095 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17096
17097 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17098 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17099 }
17100 }
17101
17102 if (depth == -1) {
17103 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17104 }
17105
17106 parse_pattern_capture(parser, captures, constant_id, value_loc);
17107 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17108 parser,
17109 value_loc,
17110 constant_id,
17111 (uint32_t) (depth == -1 ? 0 : depth)
17112 );
17113
17114 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17115}
17116
17121static void
17122parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17123 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17124 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17125 }
17126}
17127
17132parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17133 pm_node_list_t assocs = { 0 };
17134 pm_static_literals_t keys = { 0 };
17135 pm_node_t *rest = NULL;
17136
17137 switch (PM_NODE_TYPE(first_node)) {
17140 rest = first_node;
17141 break;
17142 case PM_SYMBOL_NODE: {
17143 if (pm_symbol_node_label_p(first_node)) {
17144 parse_pattern_hash_key(parser, &keys, first_node);
17145 pm_node_t *value;
17146
17148 // Otherwise, we will create an implicit local variable
17149 // target for the value.
17150 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17151 } else {
17152 // Here we have a value for the first assoc in the list, so
17153 // we will parse it now.
17154 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17155 }
17156
17157 pm_token_t operator = not_provided(parser);
17158 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17159
17160 pm_node_list_append(&assocs, assoc);
17161 break;
17162 }
17163 }
17165 default: {
17166 // If we get anything else, then this is an error. For this we'll
17167 // create a missing node for the value and create an assoc node for
17168 // the first node in the list.
17169 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17170 pm_parser_err_node(parser, first_node, diag_id);
17171
17172 pm_token_t operator = not_provided(parser);
17173 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17174 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17175
17176 pm_node_list_append(&assocs, assoc);
17177 break;
17178 }
17179 }
17180
17181 // If there are any other assocs, then we'll parse them now.
17182 while (accept1(parser, PM_TOKEN_COMMA)) {
17183 // Here we need to break to support trailing commas.
17185 // Trailing commas are not allowed to follow a rest pattern.
17186 if (rest != NULL) {
17187 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17188 }
17189
17190 break;
17191 }
17192
17193 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17194 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17195
17196 if (rest == NULL) {
17197 rest = assoc;
17198 } else {
17199 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17200 pm_node_list_append(&assocs, assoc);
17201 }
17202 } else {
17203 pm_node_t *key;
17204
17205 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17206 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17207
17209 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17210 } else if (!pm_symbol_node_label_p(key)) {
17211 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17212 }
17213 } else {
17214 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17215 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17216 }
17217
17218 parse_pattern_hash_key(parser, &keys, key);
17219 pm_node_t *value = NULL;
17220
17222 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17223 } else {
17224 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17225 }
17226
17227 pm_token_t operator = not_provided(parser);
17228 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17229
17230 if (rest != NULL) {
17231 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17232 }
17233
17234 pm_node_list_append(&assocs, assoc);
17235 }
17236 }
17237
17238 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17239 xfree(assocs.nodes);
17240
17241 pm_static_literals_free(&keys);
17242 return node;
17243}
17244
17248static pm_node_t *
17249parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17250 switch (parser->current.type) {
17252 case PM_TOKEN_METHOD_NAME: {
17253 parser_lex(parser);
17254 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17255
17256 int depth;
17257 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17258 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17259 }
17260
17261 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17262 return (pm_node_t *) pm_local_variable_target_node_create(
17263 parser,
17264 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17265 constant_id,
17266 (uint32_t) (depth == -1 ? 0 : depth)
17267 );
17268 }
17270 pm_token_t opening = parser->current;
17271 parser_lex(parser);
17272
17273 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17274 // If we have an empty array pattern, then we'll just return a new
17275 // array pattern node.
17276 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17277 }
17278
17279 // Otherwise, we'll parse the inner pattern, then deal with it depending
17280 // on the type it returns.
17281 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17282
17283 accept1(parser, PM_TOKEN_NEWLINE);
17284 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17285 pm_token_t closing = parser->previous;
17286
17287 switch (PM_NODE_TYPE(inner)) {
17288 case PM_ARRAY_PATTERN_NODE: {
17289 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17290 if (pattern_node->opening_loc.start == NULL) {
17291 pattern_node->base.location.start = opening.start;
17292 pattern_node->base.location.end = closing.end;
17293
17294 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17295 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17296
17297 return (pm_node_t *) pattern_node;
17298 }
17299
17300 break;
17301 }
17302 case PM_FIND_PATTERN_NODE: {
17303 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17304 if (pattern_node->opening_loc.start == NULL) {
17305 pattern_node->base.location.start = opening.start;
17306 pattern_node->base.location.end = closing.end;
17307
17308 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17309 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17310
17311 return (pm_node_t *) pattern_node;
17312 }
17313
17314 break;
17315 }
17316 default:
17317 break;
17318 }
17319
17320 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17321 pm_array_pattern_node_requireds_append(node, inner);
17322 return (pm_node_t *) node;
17323 }
17324 case PM_TOKEN_BRACE_LEFT: {
17325 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17326 parser->pattern_matching_newlines = false;
17327
17329 pm_token_t opening = parser->current;
17330 parser_lex(parser);
17331
17332 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17333 // If we have an empty hash pattern, then we'll just return a new hash
17334 // pattern node.
17335 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17336 } else {
17337 pm_node_t *first_node;
17338
17339 switch (parser->current.type) {
17340 case PM_TOKEN_LABEL:
17341 parser_lex(parser);
17342 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17343 break;
17345 first_node = parse_pattern_keyword_rest(parser, captures);
17346 break;
17348 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17349 break;
17350 default: {
17351 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17352 parser_lex(parser);
17353
17354 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17355 break;
17356 }
17357 }
17358
17359 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17360
17361 accept1(parser, PM_TOKEN_NEWLINE);
17362 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17363 pm_token_t closing = parser->previous;
17364
17365 node->base.location.start = opening.start;
17366 node->base.location.end = closing.end;
17367
17368 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17369 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17370 }
17371
17372 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17373 return (pm_node_t *) node;
17374 }
17375 case PM_TOKEN_UDOT_DOT:
17376 case PM_TOKEN_UDOT_DOT_DOT: {
17377 pm_token_t operator = parser->current;
17378 parser_lex(parser);
17379
17380 // Since we have a unary range operator, we need to parse the subsequent
17381 // expression as the right side of the range.
17382 switch (parser->current.type) {
17383 case PM_CASE_PRIMITIVE: {
17384 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17385 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17386 }
17387 default: {
17388 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17389 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17390 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17391 }
17392 }
17393 }
17394 case PM_CASE_PRIMITIVE: {
17395 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17396
17397 // If we found a label, we need to immediately return to the caller.
17398 if (pm_symbol_node_label_p(node)) return node;
17399
17400 // Now that we have a primitive, we need to check if it's part of a range.
17401 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17402 pm_token_t operator = parser->previous;
17403
17404 // Now that we have the operator, we need to check if this is followed
17405 // by another expression. If it is, then we will create a full range
17406 // node. Otherwise, we'll create an endless range.
17407 switch (parser->current.type) {
17408 case PM_CASE_PRIMITIVE: {
17409 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17410 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17411 }
17412 default:
17413 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17414 }
17415 }
17416
17417 return node;
17418 }
17419 case PM_TOKEN_CARET: {
17420 parser_lex(parser);
17421 pm_token_t operator = parser->previous;
17422
17423 // At this point we have a pin operator. We need to check the subsequent
17424 // expression to determine if it's a variable or an expression.
17425 switch (parser->current.type) {
17426 case PM_TOKEN_IDENTIFIER: {
17427 parser_lex(parser);
17428 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17429
17430 if (variable == NULL) {
17431 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17432 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17433 }
17434
17435 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17436 }
17438 parser_lex(parser);
17439 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17440
17441 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17442 }
17444 parser_lex(parser);
17445 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17446
17447 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17448 }
17450 parser_lex(parser);
17451 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17452
17453 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17454 }
17456 parser_lex(parser);
17457 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17458
17459 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17460 }
17462 parser_lex(parser);
17463 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17464
17465 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17466 }
17468 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17469 parser->pattern_matching_newlines = false;
17470
17471 pm_token_t lparen = parser->current;
17472 parser_lex(parser);
17473
17474 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17475 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17476
17477 accept1(parser, PM_TOKEN_NEWLINE);
17478 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17479 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17480 }
17481 default: {
17482 // If we get here, then we have a pin operator followed by something
17483 // not understood. We'll create a missing node and return that.
17484 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17485 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17486 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17487 }
17488 }
17489 }
17490 case PM_TOKEN_UCOLON_COLON: {
17491 pm_token_t delimiter = parser->current;
17492 parser_lex(parser);
17493
17494 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17495 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17496
17497 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17498 }
17499 case PM_TOKEN_CONSTANT: {
17500 pm_token_t constant = parser->current;
17501 parser_lex(parser);
17502
17503 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17504 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17505 }
17506 default:
17507 pm_parser_err_current(parser, diag_id);
17508 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17509 }
17510}
17511
17516static pm_node_t *
17517parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17518 pm_node_t *node = first_node;
17519
17520 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17521 pm_token_t operator = parser->previous;
17522
17523 switch (parser->current.type) {
17527 case PM_TOKEN_CARET:
17528 case PM_TOKEN_CONSTANT:
17530 case PM_TOKEN_UDOT_DOT:
17532 case PM_CASE_PRIMITIVE: {
17533 if (node == NULL) {
17534 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17535 } else {
17536 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17537 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17538 }
17539
17540 break;
17541 }
17544 pm_token_t opening = parser->current;
17545 parser_lex(parser);
17546
17547 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17548 accept1(parser, PM_TOKEN_NEWLINE);
17549 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17550 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17551
17552 if (node == NULL) {
17553 node = right;
17554 } else {
17555 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17556 }
17557
17558 break;
17559 }
17560 default: {
17561 pm_parser_err_current(parser, diag_id);
17562 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17563
17564 if (node == NULL) {
17565 node = right;
17566 } else {
17567 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17568 }
17569
17570 break;
17571 }
17572 }
17573 }
17574
17575 // If we have an =>, then we are assigning this pattern to a variable.
17576 // In this case we should create an assignment node.
17577 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17578 pm_token_t operator = parser->previous;
17579 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17580
17581 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17582 int depth;
17583
17584 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17585 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17586 }
17587
17588 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17589 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17590 parser,
17591 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17592 constant_id,
17593 (uint32_t) (depth == -1 ? 0 : depth)
17594 );
17595
17596 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17597 }
17598
17599 return node;
17600}
17601
17605static pm_node_t *
17606parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17607 pm_node_t *node = NULL;
17608
17609 bool leading_rest = false;
17610 bool trailing_rest = false;
17611
17612 switch (parser->current.type) {
17613 case PM_TOKEN_LABEL: {
17614 parser_lex(parser);
17615 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17616 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17617
17618 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17619 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17620 }
17621
17622 return node;
17623 }
17624 case PM_TOKEN_USTAR_STAR: {
17625 node = parse_pattern_keyword_rest(parser, captures);
17626 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17627
17628 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17629 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17630 }
17631
17632 return node;
17633 }
17634 case PM_TOKEN_STRING_BEGIN: {
17635 // We need special handling for string beginnings because they could
17636 // be dynamic symbols leading to hash patterns.
17637 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17638
17639 if (pm_symbol_node_label_p(node)) {
17640 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17641
17642 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17643 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17644 }
17645
17646 return node;
17647 }
17648
17649 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17650 break;
17651 }
17652 case PM_TOKEN_USTAR: {
17653 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17654 parser_lex(parser);
17655 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17656 leading_rest = true;
17657 break;
17658 }
17659 }
17661 default:
17662 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17663 break;
17664 }
17665
17666 // If we got a dynamic label symbol, then we need to treat it like the
17667 // beginning of a hash pattern.
17668 if (pm_symbol_node_label_p(node)) {
17669 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17670 }
17671
17672 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17673 // If we have a comma, then we are now parsing either an array pattern
17674 // or a find pattern. We need to parse all of the patterns, put them
17675 // into a big list, and then determine which type of node we have.
17676 pm_node_list_t nodes = { 0 };
17677 pm_node_list_append(&nodes, node);
17678
17679 // Gather up all of the patterns into the list.
17680 while (accept1(parser, PM_TOKEN_COMMA)) {
17681 // Break early here in case we have a trailing comma.
17683 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17684 pm_node_list_append(&nodes, node);
17685 trailing_rest = true;
17686 break;
17687 }
17688
17689 if (accept1(parser, PM_TOKEN_USTAR)) {
17690 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17691
17692 // If we have already parsed a splat pattern, then this is an
17693 // error. We will continue to parse the rest of the patterns,
17694 // but we will indicate it as an error.
17695 if (trailing_rest) {
17696 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17697 }
17698
17699 trailing_rest = true;
17700 } else {
17701 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17702 }
17703
17704 pm_node_list_append(&nodes, node);
17705 }
17706
17707 // If the first pattern and the last pattern are rest patterns, then we
17708 // will call this a find pattern, regardless of how many rest patterns
17709 // are in between because we know we already added the appropriate
17710 // errors. Otherwise we will create an array pattern.
17711 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17712 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17713
17714 if (nodes.size == 2) {
17715 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17716 }
17717 } else {
17718 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17719
17720 if (leading_rest && trailing_rest) {
17721 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17722 }
17723 }
17724
17725 xfree(nodes.nodes);
17726 } else if (leading_rest) {
17727 // Otherwise, if we parsed a single splat pattern, then we know we have
17728 // an array pattern, so we can go ahead and create that node.
17729 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17730 }
17731
17732 return node;
17733}
17734
17740static inline void
17741parse_negative_numeric(pm_node_t *node) {
17742 switch (PM_NODE_TYPE(node)) {
17743 case PM_INTEGER_NODE: {
17744 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17745 cast->base.location.start--;
17746 cast->value.negative = true;
17747 break;
17748 }
17749 case PM_FLOAT_NODE: {
17750 pm_float_node_t *cast = (pm_float_node_t *) node;
17751 cast->base.location.start--;
17752 cast->value = -cast->value;
17753 break;
17754 }
17755 case PM_RATIONAL_NODE: {
17756 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17757 cast->base.location.start--;
17758 cast->numerator.negative = true;
17759 break;
17760 }
17761 case PM_IMAGINARY_NODE:
17762 node->location.start--;
17763 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17764 break;
17765 default:
17766 assert(false && "unreachable");
17767 break;
17768 }
17769}
17770
17776static void
17777pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17778 switch (diag_id) {
17779 case PM_ERR_HASH_KEY: {
17780 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17781 break;
17782 }
17783 case PM_ERR_HASH_VALUE:
17784 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17785 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17786 break;
17787 }
17788 case PM_ERR_UNARY_RECEIVER: {
17789 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17790 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17791 break;
17792 }
17793 case PM_ERR_UNARY_DISALLOWED:
17794 case PM_ERR_EXPECT_ARGUMENT: {
17795 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17796 break;
17797 }
17798 default:
17799 pm_parser_err_previous(parser, diag_id);
17800 break;
17801 }
17802}
17803
17807static void
17808parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17809#define CONTEXT_NONE 0
17810#define CONTEXT_THROUGH_ENSURE 1
17811#define CONTEXT_THROUGH_ELSE 2
17812
17813 pm_context_node_t *context_node = parser->current_context;
17814 int context = CONTEXT_NONE;
17815
17816 while (context_node != NULL) {
17817 switch (context_node->context) {
17825 case PM_CONTEXT_DEFINED:
17827 // These are the good cases. We're allowed to have a retry here.
17828 return;
17829 case PM_CONTEXT_CLASS:
17830 case PM_CONTEXT_DEF:
17832 case PM_CONTEXT_MAIN:
17833 case PM_CONTEXT_MODULE:
17834 case PM_CONTEXT_PREEXE:
17835 case PM_CONTEXT_SCLASS:
17836 // These are the bad cases. We're not allowed to have a retry in
17837 // these contexts.
17838 if (context == CONTEXT_NONE) {
17839 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17840 } else if (context == CONTEXT_THROUGH_ENSURE) {
17841 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17842 } else if (context == CONTEXT_THROUGH_ELSE) {
17843 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17844 }
17845 return;
17853 // These are also bad cases, but with a more specific error
17854 // message indicating the else.
17855 context = CONTEXT_THROUGH_ELSE;
17856 break;
17864 // These are also bad cases, but with a more specific error
17865 // message indicating the ensure.
17866 context = CONTEXT_THROUGH_ENSURE;
17867 break;
17868 case PM_CONTEXT_NONE:
17869 // This case should never happen.
17870 assert(false && "unreachable");
17871 break;
17872 case PM_CONTEXT_BEGIN:
17875 case PM_CONTEXT_CASE_IN:
17878 case PM_CONTEXT_ELSE:
17879 case PM_CONTEXT_ELSIF:
17880 case PM_CONTEXT_EMBEXPR:
17882 case PM_CONTEXT_FOR:
17883 case PM_CONTEXT_IF:
17888 case PM_CONTEXT_PARENS:
17889 case PM_CONTEXT_POSTEXE:
17891 case PM_CONTEXT_TERNARY:
17892 case PM_CONTEXT_UNLESS:
17893 case PM_CONTEXT_UNTIL:
17894 case PM_CONTEXT_WHILE:
17895 // In these contexts we should continue walking up the list of
17896 // contexts.
17897 break;
17898 }
17899
17900 context_node = context_node->prev;
17901 }
17902
17903#undef CONTEXT_NONE
17904#undef CONTEXT_ENSURE
17905#undef CONTEXT_ELSE
17906}
17907
17911static void
17912parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17913 pm_context_node_t *context_node = parser->current_context;
17914
17915 while (context_node != NULL) {
17916 switch (context_node->context) {
17917 case PM_CONTEXT_DEF:
17919 case PM_CONTEXT_DEFINED:
17923 // These are the good cases. We're allowed to have a block exit
17924 // in these contexts.
17925 return;
17926 case PM_CONTEXT_CLASS:
17930 case PM_CONTEXT_MAIN:
17931 case PM_CONTEXT_MODULE:
17935 case PM_CONTEXT_SCLASS:
17939 // These are the bad cases. We're not allowed to have a retry in
17940 // these contexts.
17941 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17942 return;
17943 case PM_CONTEXT_NONE:
17944 // This case should never happen.
17945 assert(false && "unreachable");
17946 break;
17947 case PM_CONTEXT_BEGIN:
17956 case PM_CONTEXT_CASE_IN:
17959 case PM_CONTEXT_ELSE:
17960 case PM_CONTEXT_ELSIF:
17961 case PM_CONTEXT_EMBEXPR:
17963 case PM_CONTEXT_FOR:
17964 case PM_CONTEXT_IF:
17972 case PM_CONTEXT_PARENS:
17973 case PM_CONTEXT_POSTEXE:
17975 case PM_CONTEXT_PREEXE:
17977 case PM_CONTEXT_TERNARY:
17978 case PM_CONTEXT_UNLESS:
17979 case PM_CONTEXT_UNTIL:
17980 case PM_CONTEXT_WHILE:
17981 // In these contexts we should continue walking up the list of
17982 // contexts.
17983 break;
17984 }
17985
17986 context_node = context_node->prev;
17987 }
17988}
17989
17994typedef struct {
17997
17999 const uint8_t *start;
18000
18002 const uint8_t *end;
18003
18012
18017static void
18018parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18020 pm_location_t location;
18021
18022 if (callback_data->shared) {
18023 location = (pm_location_t) { .start = start, .end = end };
18024 } else {
18025 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18026 }
18027
18028 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18029}
18030
18034static void
18035parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18036 const pm_string_t *unescaped = &node->unescaped;
18038 .parser = parser,
18039 .start = node->base.location.start,
18040 .end = node->base.location.end,
18041 .shared = unescaped->type == PM_STRING_SHARED
18042 };
18043
18044 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18045}
18046
18050static inline pm_node_t *
18051parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18052 switch (parser->current.type) {
18054 parser_lex(parser);
18055
18056 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18057 pm_accepts_block_stack_push(parser, true);
18058 bool parsed_bare_hash = false;
18059
18060 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18061 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18062
18063 // Handle the case where we don't have a comma and we have a
18064 // newline followed by a right bracket.
18065 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18066 break;
18067 }
18068
18069 // Ensure that we have a comma between elements in the array.
18070 if (array->elements.size > 0) {
18071 if (accept1(parser, PM_TOKEN_COMMA)) {
18072 // If there was a comma but we also accepts a newline,
18073 // then this is a syntax error.
18074 if (accepted_newline) {
18075 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18076 }
18077 } else {
18078 // If there was no comma, then we need to add a syntax
18079 // error.
18080 const uint8_t *location = parser->previous.end;
18081 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18082
18083 parser->previous.start = location;
18084 parser->previous.type = PM_TOKEN_MISSING;
18085 }
18086 }
18087
18088 // If we have a right bracket immediately following a comma,
18089 // this is allowed since it's a trailing comma. In this case we
18090 // can break out of the loop.
18091 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18092
18093 pm_node_t *element;
18094
18095 if (accept1(parser, PM_TOKEN_USTAR)) {
18096 pm_token_t operator = parser->previous;
18097 pm_node_t *expression = NULL;
18098
18099 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18100 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18101 } else {
18102 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18103 }
18104
18105 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18106 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18107 if (parsed_bare_hash) {
18108 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18109 }
18110
18111 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18112 pm_static_literals_t hash_keys = { 0 };
18113
18115 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18116 }
18117
18118 pm_static_literals_free(&hash_keys);
18119 parsed_bare_hash = true;
18120 } else {
18121 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18122
18123 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18124 if (parsed_bare_hash) {
18125 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18126 }
18127
18128 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18129 pm_static_literals_t hash_keys = { 0 };
18130 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18131
18132 pm_token_t operator;
18133 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18134 operator = parser->previous;
18135 } else {
18136 operator = not_provided(parser);
18137 }
18138
18139 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18140 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18141 pm_keyword_hash_node_elements_append(hash, assoc);
18142
18143 element = (pm_node_t *) hash;
18144 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18145 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18146 }
18147
18148 pm_static_literals_free(&hash_keys);
18149 parsed_bare_hash = true;
18150 }
18151 }
18152
18153 pm_array_node_elements_append(array, element);
18154 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18155 }
18156
18157 accept1(parser, PM_TOKEN_NEWLINE);
18158
18159 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18160 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18161 parser->previous.start = parser->previous.end;
18162 parser->previous.type = PM_TOKEN_MISSING;
18163 }
18164
18165 pm_array_node_close_set(array, &parser->previous);
18166 pm_accepts_block_stack_pop(parser);
18167
18168 return (pm_node_t *) array;
18169 }
18172 pm_token_t opening = parser->current;
18173
18174 pm_node_list_t current_block_exits = { 0 };
18175 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18176
18177 parser_lex(parser);
18178 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18179
18180 // If this is the end of the file or we match a right parenthesis, then
18181 // we have an empty parentheses node, and we can immediately return.
18182 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18183 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18184
18185 pop_block_exits(parser, previous_block_exits);
18186 pm_node_list_free(&current_block_exits);
18187
18188 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18189 }
18190
18191 // Otherwise, we're going to parse the first statement in the list
18192 // of statements within the parentheses.
18193 pm_accepts_block_stack_push(parser, true);
18194 context_push(parser, PM_CONTEXT_PARENS);
18195 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18196 context_pop(parser);
18197
18198 // Determine if this statement is followed by a terminator. In the
18199 // case of a single statement, this is fine. But in the case of
18200 // multiple statements it's required.
18201 bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18202 if (terminator_found) {
18203 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18204 }
18205
18206 // If we hit a right parenthesis, then we're done parsing the
18207 // parentheses node, and we can check which kind of node we should
18208 // return.
18209 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18211 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18212 }
18213
18214 parser_lex(parser);
18215 pm_accepts_block_stack_pop(parser);
18216
18217 pop_block_exits(parser, previous_block_exits);
18218 pm_node_list_free(&current_block_exits);
18219
18220 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18221 // If we have a single statement and are ending on a right
18222 // parenthesis, then we need to check if this is possibly a
18223 // multiple target node.
18224 pm_multi_target_node_t *multi_target;
18225
18226 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18227 multi_target = (pm_multi_target_node_t *) statement;
18228 } else {
18229 multi_target = pm_multi_target_node_create(parser);
18230 pm_multi_target_node_targets_append(parser, multi_target, statement);
18231 }
18232
18233 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18234 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18235
18236 multi_target->lparen_loc = lparen_loc;
18237 multi_target->rparen_loc = rparen_loc;
18238 multi_target->base.location.start = lparen_loc.start;
18239 multi_target->base.location.end = rparen_loc.end;
18240
18241 pm_node_t *result;
18242 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18243 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18244 accept1(parser, PM_TOKEN_NEWLINE);
18245 } else {
18246 result = (pm_node_t *) multi_target;
18247 }
18248
18249 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18250 // All set, this is explicitly allowed by the parent
18251 // context.
18252 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18253 // All set, we're inside a for loop and we're parsing
18254 // multiple targets.
18255 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18256 // Multi targets are not allowed when it's not a
18257 // statement level.
18258 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18259 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18260 // Multi targets must be followed by an equal sign in
18261 // order to be valid (or a right parenthesis if they are
18262 // nested).
18263 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18264 }
18265
18266 return result;
18267 }
18268
18269 // If we have a single statement and are ending on a right parenthesis
18270 // and we didn't return a multiple assignment node, then we can return a
18271 // regular parentheses node now.
18272 pm_statements_node_t *statements = pm_statements_node_create(parser);
18273 pm_statements_node_body_append(parser, statements, statement, true);
18274
18275 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18276 }
18277
18278 // If we have more than one statement in the set of parentheses,
18279 // then we are going to parse all of them as a list of statements.
18280 // We'll do that here.
18281 context_push(parser, PM_CONTEXT_PARENS);
18282 pm_statements_node_t *statements = pm_statements_node_create(parser);
18283 pm_statements_node_body_append(parser, statements, statement, true);
18284
18285 // If we didn't find a terminator and we didn't find a right
18286 // parenthesis, then this is a syntax error.
18287 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18288 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18289 }
18290
18291 // Parse each statement within the parentheses.
18292 while (true) {
18293 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18294 pm_statements_node_body_append(parser, statements, node, true);
18295
18296 // If we're recovering from a syntax error, then we need to stop
18297 // parsing the statements now.
18298 if (parser->recovering) {
18299 // If this is the level of context where the recovery has
18300 // happened, then we can mark the parser as done recovering.
18301 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18302 break;
18303 }
18304
18305 // If we couldn't parse an expression at all, then we need to
18306 // bail out of the loop.
18307 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18308
18309 // If we successfully parsed a statement, then we are going to
18310 // need terminator to delimit them.
18311 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18312 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18313 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18314 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18315 break;
18316 } else if (!match1(parser, PM_TOKEN_EOF)) {
18317 // If we're at the end of the file, then we're going to add
18318 // an error after this for the ) anyway.
18319 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18320 }
18321 }
18322
18323 context_pop(parser);
18324 pm_accepts_block_stack_pop(parser);
18325 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18326
18327 // When we're parsing multi targets, we allow them to be followed by
18328 // a right parenthesis if they are at the statement level. This is
18329 // only possible if they are the final statement in a parentheses.
18330 // We need to explicitly reject that here.
18331 {
18332 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18333
18334 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18335 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18336 pm_multi_target_node_targets_append(parser, multi_target, statement);
18337
18338 statement = (pm_node_t *) multi_target;
18339 statements->body.nodes[statements->body.size - 1] = statement;
18340 }
18341
18342 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18343 const uint8_t *offset = statement->location.end;
18344 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18345 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18346
18347 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18348 statements->body.nodes[statements->body.size - 1] = statement;
18349
18350 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18351 }
18352 }
18353
18354 pop_block_exits(parser, previous_block_exits);
18355 pm_node_list_free(&current_block_exits);
18356
18357 pm_void_statements_check(parser, statements, true);
18358 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18359 }
18360 case PM_TOKEN_BRACE_LEFT: {
18361 // If we were passed a current_hash_keys via the parser, then that
18362 // means we're already parsing a hash and we want to share the set
18363 // of hash keys with this inner hash we're about to parse for the
18364 // sake of warnings. We'll set it to NULL after we grab it to make
18365 // sure subsequent expressions don't use it. Effectively this is a
18366 // way of getting around passing it to every call to
18367 // parse_expression.
18368 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18369 parser->current_hash_keys = NULL;
18370
18371 pm_accepts_block_stack_push(parser, true);
18372 parser_lex(parser);
18373
18374 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18375
18376 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18377 if (current_hash_keys != NULL) {
18378 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18379 } else {
18380 pm_static_literals_t hash_keys = { 0 };
18381 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18382 pm_static_literals_free(&hash_keys);
18383 }
18384
18385 accept1(parser, PM_TOKEN_NEWLINE);
18386 }
18387
18388 pm_accepts_block_stack_pop(parser);
18389 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18390 pm_hash_node_closing_loc_set(node, &parser->previous);
18391
18392 return (pm_node_t *) node;
18393 }
18395 parser_lex(parser);
18396
18397 pm_token_t opening = parser->previous;
18398 opening.type = PM_TOKEN_STRING_BEGIN;
18399 opening.end = opening.start + 1;
18400
18401 pm_token_t content = parser->previous;
18402 content.type = PM_TOKEN_STRING_CONTENT;
18403 content.start = content.start + 1;
18404
18405 pm_token_t closing = not_provided(parser);
18406 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18407 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18408
18409 // Characters can be followed by strings in which case they are
18410 // automatically concatenated.
18411 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18412 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18413 }
18414
18415 return node;
18416 }
18418 parser_lex(parser);
18419 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18420
18421 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18422 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18423 }
18424
18425 return node;
18426 }
18427 case PM_TOKEN_CONSTANT: {
18428 parser_lex(parser);
18429 pm_token_t constant = parser->previous;
18430
18431 // If a constant is immediately followed by parentheses, then this is in
18432 // fact a method call, not a constant read.
18433 if (
18434 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18435 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18436 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18437 match1(parser, PM_TOKEN_BRACE_LEFT)
18438 ) {
18439 pm_arguments_t arguments = { 0 };
18440 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18441 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18442 }
18443
18444 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18445
18446 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18447 // If we get here, then we have a comma immediately following a
18448 // constant, so we're going to parse this as a multiple assignment.
18449 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18450 }
18451
18452 return node;
18453 }
18454 case PM_TOKEN_UCOLON_COLON: {
18455 parser_lex(parser);
18456 pm_token_t delimiter = parser->previous;
18457
18458 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18459 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18460
18461 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18462 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18463 }
18464
18465 return node;
18466 }
18467 case PM_TOKEN_UDOT_DOT:
18468 case PM_TOKEN_UDOT_DOT_DOT: {
18469 pm_token_t operator = parser->current;
18470 parser_lex(parser);
18471
18472 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18473
18474 // Unary .. and ... are special because these are non-associative
18475 // operators that can also be unary operators. In this case we need
18476 // to explicitly reject code that has a .. or ... that follows this
18477 // expression.
18478 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18479 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18480 }
18481
18482 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18483 }
18484 case PM_TOKEN_FLOAT:
18485 parser_lex(parser);
18486 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18488 parser_lex(parser);
18489 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18491 parser_lex(parser);
18492 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18494 parser_lex(parser);
18495 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18497 parser_lex(parser);
18498 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18499
18500 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18501 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18502 }
18503
18504 return node;
18505 }
18507 parser_lex(parser);
18508 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18509
18510 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18511 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18512 }
18513
18514 return node;
18515 }
18517 parser_lex(parser);
18518 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18519
18520 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18521 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18522 }
18523
18524 return node;
18525 }
18527 case PM_TOKEN_METHOD_NAME: {
18528 parser_lex(parser);
18529 pm_token_t identifier = parser->previous;
18530 pm_node_t *node = parse_variable_call(parser);
18531
18532 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18533 // If parse_variable_call returned with a call node, then we
18534 // know the identifier is not in the local table. In that case
18535 // we need to check if there are arguments following the
18536 // identifier.
18537 pm_call_node_t *call = (pm_call_node_t *) node;
18538 pm_arguments_t arguments = { 0 };
18539
18540 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18541 // Since we found arguments, we need to turn off the
18542 // variable call bit in the flags.
18543 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18544
18545 call->opening_loc = arguments.opening_loc;
18546 call->arguments = arguments.arguments;
18547 call->closing_loc = arguments.closing_loc;
18548 call->block = arguments.block;
18549
18550 if (arguments.block != NULL) {
18551 call->base.location.end = arguments.block->location.end;
18552 } else if (arguments.closing_loc.start == NULL) {
18553 if (arguments.arguments != NULL) {
18554 call->base.location.end = arguments.arguments->base.location.end;
18555 } else {
18556 call->base.location.end = call->message_loc.end;
18557 }
18558 } else {
18559 call->base.location.end = arguments.closing_loc.end;
18560 }
18561 }
18562 } else {
18563 // Otherwise, we know the identifier is in the local table. This
18564 // can still be a method call if it is followed by arguments or
18565 // a block, so we need to check for that here.
18566 if (
18567 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18568 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18569 match1(parser, PM_TOKEN_BRACE_LEFT)
18570 ) {
18571 pm_arguments_t arguments = { 0 };
18572 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18573 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18574
18576 // If we're about to convert an 'it' implicit local
18577 // variable read into a method call, we need to remove
18578 // it from the list of implicit local variables.
18579 parse_target_implicit_parameter(parser, node);
18580 } else {
18581 // Otherwise, we're about to convert a regular local
18582 // variable read into a method call, in which case we
18583 // need to indicate that this was not a read for the
18584 // purposes of warnings.
18586
18587 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18588 parse_target_implicit_parameter(parser, node);
18589 } else {
18591 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18592 }
18593 }
18594
18595 pm_node_destroy(parser, node);
18596 return (pm_node_t *) fcall;
18597 }
18598 }
18599
18600 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18601 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18602 }
18603
18604 return node;
18605 }
18607 // Here we have found a heredoc. We'll parse it and add it to the
18608 // list of strings.
18609 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18610 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18611
18612 size_t common_whitespace = (size_t) -1;
18613 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18614
18615 parser_lex(parser);
18616 pm_token_t opening = parser->previous;
18617
18618 pm_node_t *node;
18619 pm_node_t *part;
18620
18621 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18622 // If we get here, then we have an empty heredoc. We'll create
18623 // an empty content token and return an empty string node.
18624 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18625 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18626
18627 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18628 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18629 } else {
18630 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18631 }
18632
18633 node->location.end = opening.end;
18634 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18635 // If we get here, then we tried to find something in the
18636 // heredoc but couldn't actually parse anything, so we'll just
18637 // return a missing node.
18638 //
18639 // parse_string_part handles its own errors, so there is no need
18640 // for us to add one here.
18641 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18642 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18643 // If we get here, then the part that we parsed was plain string
18644 // content and we're at the end of the heredoc, so we can return
18645 // just a string node with the heredoc opening and closing as
18646 // its opening and closing.
18647 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18648 pm_string_node_t *cast = (pm_string_node_t *) part;
18649
18650 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18651 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18652 cast->base.location = cast->opening_loc;
18653
18654 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18655 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18656 cast->base.type = PM_X_STRING_NODE;
18657 }
18658
18659 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18660 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18661 }
18662
18663 node = (pm_node_t *) cast;
18664 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18665 } else {
18666 // If we get here, then we have multiple parts in the heredoc,
18667 // so we'll need to create an interpolated string node to hold
18668 // them all.
18669 pm_node_list_t parts = { 0 };
18670 pm_node_list_append(&parts, part);
18671
18672 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18673 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18674 pm_node_list_append(&parts, part);
18675 }
18676 }
18677
18678 // Now that we have all of the parts, create the correct type of
18679 // interpolated node.
18680 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18681 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18682 cast->parts = parts;
18683
18684 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18685 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18686
18687 cast->base.location = cast->opening_loc;
18688 node = (pm_node_t *) cast;
18689 } else {
18690 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18691 pm_node_list_free(&parts);
18692
18693 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18694 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18695
18696 cast->base.location = cast->opening_loc;
18697 node = (pm_node_t *) cast;
18698 }
18699
18700 // If this is a heredoc that is indented with a ~, then we need
18701 // to dedent each line by the common leading whitespace.
18702 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18703 pm_node_list_t *nodes;
18704 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18705 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18706 } else {
18707 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18708 }
18709
18710 parse_heredoc_dedent(parser, nodes, common_whitespace);
18711 }
18712 }
18713
18714 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18715 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18716 }
18717
18718 return node;
18719 }
18721 parser_lex(parser);
18722 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18723
18724 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18725 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18726 }
18727
18728 return node;
18729 }
18730 case PM_TOKEN_INTEGER: {
18731 pm_node_flags_t base = parser->integer_base;
18732 parser_lex(parser);
18733 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18734 }
18736 pm_node_flags_t base = parser->integer_base;
18737 parser_lex(parser);
18738 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18739 }
18741 pm_node_flags_t base = parser->integer_base;
18742 parser_lex(parser);
18743 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18744 }
18746 pm_node_flags_t base = parser->integer_base;
18747 parser_lex(parser);
18748 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18749 }
18751 parser_lex(parser);
18752 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18754 parser_lex(parser);
18755 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18757 parser_lex(parser);
18758 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18760 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18761 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18762 }
18763
18764 parser_lex(parser);
18765 pm_token_t keyword = parser->previous;
18766
18767 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18768 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18769
18770 switch (PM_NODE_TYPE(new_name)) {
18776 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18777 }
18778 } else {
18779 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18780 }
18781
18782 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18783 }
18784 case PM_SYMBOL_NODE:
18787 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18788 }
18789 }
18791 default:
18792 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18793 }
18794 }
18795 case PM_TOKEN_KEYWORD_CASE: {
18796 size_t opening_newline_index = token_newline_index(parser);
18797 parser_lex(parser);
18798
18799 pm_token_t case_keyword = parser->previous;
18800 pm_node_t *predicate = NULL;
18801
18802 pm_node_list_t current_block_exits = { 0 };
18803 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18804
18805 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18806 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18807 predicate = NULL;
18808 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18809 predicate = NULL;
18810 } else if (!token_begins_expression_p(parser->current.type)) {
18811 predicate = NULL;
18812 } else {
18813 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18814 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18815 }
18816
18817 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18818 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18819 parser_lex(parser);
18820
18821 pop_block_exits(parser, previous_block_exits);
18822 pm_node_list_free(&current_block_exits);
18823
18824 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18825 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18826 }
18827
18828 // At this point we can create a case node, though we don't yet know
18829 // if it is a case-in or case-when node.
18830 pm_token_t end_keyword = not_provided(parser);
18831 pm_node_t *node;
18832
18833 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18834 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18835 pm_static_literals_t literals = { 0 };
18836
18837 // At this point we've seen a when keyword, so we know this is a
18838 // case-when node. We will continue to parse the when nodes
18839 // until we hit the end of the list.
18840 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18841 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18842 parser_lex(parser);
18843
18844 pm_token_t when_keyword = parser->previous;
18845 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18846
18847 do {
18848 if (accept1(parser, PM_TOKEN_USTAR)) {
18849 pm_token_t operator = parser->previous;
18850 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18851
18852 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18853 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18854
18855 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18856 } else {
18857 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18858 pm_when_node_conditions_append(when_node, condition);
18859
18860 // If we found a missing node, then this is a syntax
18861 // error and we should stop looping.
18862 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18863
18864 // If this is a string node, then we need to mark it
18865 // as frozen because when clause strings are frozen.
18866 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18867 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18868 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18869 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18870 }
18871
18872 pm_when_clause_static_literals_add(parser, &literals, condition);
18873 }
18874 } while (accept1(parser, PM_TOKEN_COMMA));
18875
18876 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18877 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18878 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18879 }
18880 } else {
18881 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18882 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18883 }
18884
18886 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18887 if (statements != NULL) {
18888 pm_when_node_statements_set(when_node, statements);
18889 }
18890 }
18891
18892 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18893 }
18894
18895 // If we didn't parse any conditions (in or when) then we need
18896 // to indicate that we have an error.
18897 if (case_node->conditions.size == 0) {
18898 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18899 }
18900
18901 pm_static_literals_free(&literals);
18902 node = (pm_node_t *) case_node;
18903 } else {
18904 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18905
18906 // If this is a case-match node (i.e., it is a pattern matching
18907 // case statement) then we must have a predicate.
18908 if (predicate == NULL) {
18909 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18910 }
18911
18912 // At this point we expect that we're parsing a case-in node. We
18913 // will continue to parse the in nodes until we hit the end of
18914 // the list.
18915 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18916 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18917
18918 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18919 parser->pattern_matching_newlines = true;
18920
18921 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18922 parser->command_start = false;
18923 parser_lex(parser);
18924
18925 pm_token_t in_keyword = parser->previous;
18926
18927 pm_constant_id_list_t captures = { 0 };
18928 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18929
18930 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18931 pm_constant_id_list_free(&captures);
18932
18933 // Since we're in the top-level of the case-in node we need
18934 // to check for guard clauses in the form of `if` or
18935 // `unless` statements.
18936 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18937 pm_token_t keyword = parser->previous;
18938 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18939 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18940 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18941 pm_token_t keyword = parser->previous;
18942 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18943 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18944 }
18945
18946 // Now we need to check for the terminator of the in node's
18947 // pattern. It can be a newline or semicolon optionally
18948 // followed by a `then` keyword.
18949 pm_token_t then_keyword;
18950 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18951 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18952 then_keyword = parser->previous;
18953 } else {
18954 then_keyword = not_provided(parser);
18955 }
18956 } else {
18957 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18958 then_keyword = parser->previous;
18959 }
18960
18961 // Now we can actually parse the statements associated with
18962 // the in node.
18963 pm_statements_node_t *statements;
18965 statements = NULL;
18966 } else {
18967 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18968 }
18969
18970 // Now that we have the full pattern and statements, we can
18971 // create the node and attach it to the case node.
18972 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18973 pm_case_match_node_condition_append(case_node, condition);
18974 }
18975
18976 // If we didn't parse any conditions (in or when) then we need
18977 // to indicate that we have an error.
18978 if (case_node->conditions.size == 0) {
18979 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18980 }
18981
18982 node = (pm_node_t *) case_node;
18983 }
18984
18985 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18986 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18987 pm_token_t else_keyword = parser->previous;
18988 pm_else_node_t *else_node;
18989
18990 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18991 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18992 } else {
18993 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18994 }
18995
18996 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18997 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18998 } else {
18999 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19000 }
19001 }
19002
19003 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19004 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19005
19006 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19007 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19008 } else {
19009 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19010 }
19011
19012 pop_block_exits(parser, previous_block_exits);
19013 pm_node_list_free(&current_block_exits);
19014
19015 return node;
19016 }
19018 size_t opening_newline_index = token_newline_index(parser);
19019 parser_lex(parser);
19020
19021 pm_token_t begin_keyword = parser->previous;
19022 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19023
19024 pm_node_list_t current_block_exits = { 0 };
19025 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19026 pm_statements_node_t *begin_statements = NULL;
19027
19029 pm_accepts_block_stack_push(parser, true);
19030 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19031 pm_accepts_block_stack_pop(parser);
19032 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19033 }
19034
19035 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19036 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19037 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19038
19039 begin_node->base.location.end = parser->previous.end;
19040 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19041
19042 pop_block_exits(parser, previous_block_exits);
19043 pm_node_list_free(&current_block_exits);
19044
19045 return (pm_node_t *) begin_node;
19046 }
19048 pm_node_list_t current_block_exits = { 0 };
19049 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19050
19051 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19052 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19053 }
19054
19055 parser_lex(parser);
19056 pm_token_t keyword = parser->previous;
19057
19058 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19059 pm_token_t opening = parser->previous;
19060 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19061
19062 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19063 pm_context_t context = parser->current_context->context;
19064 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19065 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19066 }
19067
19068 flush_block_exits(parser, previous_block_exits);
19069 pm_node_list_free(&current_block_exits);
19070
19071 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19072 }
19076 parser_lex(parser);
19077
19078 pm_token_t keyword = parser->previous;
19079 pm_arguments_t arguments = { 0 };
19080
19081 if (
19082 token_begins_expression_p(parser->current.type) ||
19083 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19084 ) {
19085 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19086
19087 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19088 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19089 }
19090 }
19091
19092 switch (keyword.type) {
19094 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19095 if (!parser->partial_script) parse_block_exit(parser, node);
19096 return node;
19097 }
19098 case PM_TOKEN_KEYWORD_NEXT: {
19099 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19100 if (!parser->partial_script) parse_block_exit(parser, node);
19101 return node;
19102 }
19104 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19105 parse_return(parser, node);
19106 return node;
19107 }
19108 default:
19109 assert(false && "unreachable");
19110 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19111 }
19112 }
19114 parser_lex(parser);
19115
19116 pm_token_t keyword = parser->previous;
19117 pm_arguments_t arguments = { 0 };
19118 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19119
19120 if (
19121 arguments.opening_loc.start == NULL &&
19122 arguments.arguments == NULL &&
19123 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19124 ) {
19125 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19126 }
19127
19128 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19129 }
19131 parser_lex(parser);
19132
19133 pm_token_t keyword = parser->previous;
19134 pm_arguments_t arguments = { 0 };
19135 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19136
19137 // It's possible that we've parsed a block argument through our
19138 // call to parse_arguments_list. If we found one, we should mark it
19139 // as invalid and destroy it, as we don't have a place for it on the
19140 // yield node.
19141 if (arguments.block != NULL) {
19142 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19143 pm_node_destroy(parser, arguments.block);
19144 arguments.block = NULL;
19145 }
19146
19147 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19148 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19149
19150 return node;
19151 }
19153 size_t opening_newline_index = token_newline_index(parser);
19154 parser_lex(parser);
19155
19156 pm_token_t class_keyword = parser->previous;
19157 pm_do_loop_stack_push(parser, false);
19158
19159 pm_node_list_t current_block_exits = { 0 };
19160 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19161
19162 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19163 pm_token_t operator = parser->previous;
19164 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19165
19166 pm_parser_scope_push(parser, true);
19167 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19168 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19169 }
19170
19171 pm_node_t *statements = NULL;
19173 pm_accepts_block_stack_push(parser, true);
19174 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19175 pm_accepts_block_stack_pop(parser);
19176 }
19177
19178 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19179 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19180 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19181 } else {
19182 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19183 }
19184
19185 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19186
19187 pm_constant_id_list_t locals;
19188 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19189
19190 pm_parser_scope_pop(parser);
19191 pm_do_loop_stack_pop(parser);
19192
19193 flush_block_exits(parser, previous_block_exits);
19194 pm_node_list_free(&current_block_exits);
19195
19196 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19197 }
19198
19199 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19200 pm_token_t name = parser->previous;
19201 if (name.type != PM_TOKEN_CONSTANT) {
19202 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19203 }
19204
19205 pm_token_t inheritance_operator;
19206 pm_node_t *superclass;
19207
19208 if (match1(parser, PM_TOKEN_LESS)) {
19209 inheritance_operator = parser->current;
19210 lex_state_set(parser, PM_LEX_STATE_BEG);
19211
19212 parser->command_start = true;
19213 parser_lex(parser);
19214
19215 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19216 } else {
19217 inheritance_operator = not_provided(parser);
19218 superclass = NULL;
19219 }
19220
19221 pm_parser_scope_push(parser, true);
19222
19223 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19224 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19225 } else {
19226 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19227 }
19228 pm_node_t *statements = NULL;
19229
19231 pm_accepts_block_stack_push(parser, true);
19232 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19233 pm_accepts_block_stack_pop(parser);
19234 }
19235
19236 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19237 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19238 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19239 } else {
19240 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19241 }
19242
19243 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19244
19245 if (context_def_p(parser)) {
19246 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19247 }
19248
19249 pm_constant_id_list_t locals;
19250 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19251
19252 pm_parser_scope_pop(parser);
19253 pm_do_loop_stack_pop(parser);
19254
19255 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19256 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19257 }
19258
19259 pop_block_exits(parser, previous_block_exits);
19260 pm_node_list_free(&current_block_exits);
19261
19262 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19263 }
19264 case PM_TOKEN_KEYWORD_DEF: {
19265 pm_node_list_t current_block_exits = { 0 };
19266 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19267
19268 pm_token_t def_keyword = parser->current;
19269 size_t opening_newline_index = token_newline_index(parser);
19270
19271 pm_node_t *receiver = NULL;
19272 pm_token_t operator = not_provided(parser);
19273 pm_token_t name;
19274
19275 // This context is necessary for lexing `...` in a bare params
19276 // correctly. It must be pushed before lexing the first param, so it
19277 // is here.
19278 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19279 parser_lex(parser);
19280
19281 // This will be false if the method name is not a valid identifier
19282 // but could be followed by an operator.
19283 bool valid_name = true;
19284
19285 switch (parser->current.type) {
19286 case PM_CASE_OPERATOR:
19287 pm_parser_scope_push(parser, true);
19288 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19289 parser_lex(parser);
19290
19291 name = parser->previous;
19292 break;
19293 case PM_TOKEN_IDENTIFIER: {
19294 parser_lex(parser);
19295
19296 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19297 receiver = parse_variable_call(parser);
19298
19299 pm_parser_scope_push(parser, true);
19300 lex_state_set(parser, PM_LEX_STATE_FNAME);
19301 parser_lex(parser);
19302
19303 operator = parser->previous;
19304 name = parse_method_definition_name(parser);
19305 } else {
19306 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19307 pm_parser_scope_push(parser, true);
19308
19309 name = parser->previous;
19310 }
19311
19312 break;
19313 }
19317 valid_name = false;
19319 case PM_TOKEN_CONSTANT:
19327 pm_parser_scope_push(parser, true);
19328 parser_lex(parser);
19329
19330 pm_token_t identifier = parser->previous;
19331
19332 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19333 lex_state_set(parser, PM_LEX_STATE_FNAME);
19334 parser_lex(parser);
19335 operator = parser->previous;
19336
19337 switch (identifier.type) {
19338 case PM_TOKEN_CONSTANT:
19339 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19340 break;
19342 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19343 break;
19345 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19346 break;
19348 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19349 break;
19351 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19352 break;
19354 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19355 break;
19357 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19358 break;
19360 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19361 break;
19363 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19364 break;
19366 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19367 break;
19369 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19370 break;
19371 default:
19372 break;
19373 }
19374
19375 name = parse_method_definition_name(parser);
19376 } else {
19377 if (!valid_name) {
19378 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19379 }
19380
19381 name = identifier;
19382 }
19383 break;
19384 }
19386 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19387 // the inner expression of this parenthesis should not be
19388 // processed under this context. Thus, the context is popped
19389 // here.
19390 context_pop(parser);
19391 parser_lex(parser);
19392
19393 pm_token_t lparen = parser->previous;
19394 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19395
19396 accept1(parser, PM_TOKEN_NEWLINE);
19397 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19398 pm_token_t rparen = parser->previous;
19399
19400 lex_state_set(parser, PM_LEX_STATE_FNAME);
19401 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19402
19403 operator = parser->previous;
19404 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19405
19406 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19407 // reason as described the above.
19408 pm_parser_scope_push(parser, true);
19409 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19410 name = parse_method_definition_name(parser);
19411 break;
19412 }
19413 default:
19414 pm_parser_scope_push(parser, true);
19415 name = parse_method_definition_name(parser);
19416 break;
19417 }
19418
19419 pm_token_t lparen;
19420 pm_token_t rparen;
19421 pm_parameters_node_t *params;
19422
19423 switch (parser->current.type) {
19425 parser_lex(parser);
19426 lparen = parser->previous;
19427
19428 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19429 params = NULL;
19430 } else {
19431 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19432 }
19433
19434 lex_state_set(parser, PM_LEX_STATE_BEG);
19435 parser->command_start = true;
19436
19437 context_pop(parser);
19438 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19439 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19440 parser->previous.start = parser->previous.end;
19441 parser->previous.type = PM_TOKEN_MISSING;
19442 }
19443
19444 rparen = parser->previous;
19445 break;
19446 }
19447 case PM_CASE_PARAMETER: {
19448 // If we're about to lex a label, we need to add the label
19449 // state to make sure the next newline is ignored.
19450 if (parser->current.type == PM_TOKEN_LABEL) {
19451 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19452 }
19453
19454 lparen = not_provided(parser);
19455 rparen = not_provided(parser);
19456 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19457
19458 context_pop(parser);
19459 break;
19460 }
19461 default: {
19462 lparen = not_provided(parser);
19463 rparen = not_provided(parser);
19464 params = NULL;
19465
19466 context_pop(parser);
19467 break;
19468 }
19469 }
19470
19471 pm_node_t *statements = NULL;
19472 pm_token_t equal;
19473 pm_token_t end_keyword;
19474
19475 if (accept1(parser, PM_TOKEN_EQUAL)) {
19476 if (token_is_setter_name(&name)) {
19477 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19478 }
19479 equal = parser->previous;
19480
19481 context_push(parser, PM_CONTEXT_DEF);
19482 pm_do_loop_stack_push(parser, false);
19483 statements = (pm_node_t *) pm_statements_node_create(parser);
19484
19485 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19486
19487 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19488 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19489
19490 pm_token_t rescue_keyword = parser->previous;
19491 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19492 context_pop(parser);
19493
19494 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19495 }
19496
19497 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19498 pm_do_loop_stack_pop(parser);
19499 context_pop(parser);
19500 end_keyword = not_provided(parser);
19501 } else {
19502 equal = not_provided(parser);
19503
19504 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19505 lex_state_set(parser, PM_LEX_STATE_BEG);
19506 parser->command_start = true;
19507 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19508 } else {
19509 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19510 }
19511
19512 pm_accepts_block_stack_push(parser, true);
19513 pm_do_loop_stack_push(parser, false);
19514
19516 pm_accepts_block_stack_push(parser, true);
19517 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19518 pm_accepts_block_stack_pop(parser);
19519 }
19520
19522 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19523 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19524 } else {
19525 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19526 }
19527
19528 pm_accepts_block_stack_pop(parser);
19529 pm_do_loop_stack_pop(parser);
19530
19531 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19532 end_keyword = parser->previous;
19533 }
19534
19535 pm_constant_id_list_t locals;
19536 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19537 pm_parser_scope_pop(parser);
19538
19544 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19545
19546 flush_block_exits(parser, previous_block_exits);
19547 pm_node_list_free(&current_block_exits);
19548
19549 return (pm_node_t *) pm_def_node_create(
19550 parser,
19551 name_id,
19552 &name,
19553 receiver,
19554 params,
19555 statements,
19556 &locals,
19557 &def_keyword,
19558 &operator,
19559 &lparen,
19560 &rparen,
19561 &equal,
19562 &end_keyword
19563 );
19564 }
19566 parser_lex(parser);
19567 pm_token_t keyword = parser->previous;
19568
19569 pm_token_t lparen;
19570 pm_token_t rparen;
19571 pm_node_t *expression;
19572 context_push(parser, PM_CONTEXT_DEFINED);
19573
19574 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19575 lparen = parser->previous;
19576 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19577
19578 if (parser->recovering) {
19579 rparen = not_provided(parser);
19580 } else {
19581 accept1(parser, PM_TOKEN_NEWLINE);
19582 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19583 rparen = parser->previous;
19584 }
19585 } else {
19586 lparen = not_provided(parser);
19587 rparen = not_provided(parser);
19588 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19589 }
19590
19591 context_pop(parser);
19592 return (pm_node_t *) pm_defined_node_create(
19593 parser,
19594 &lparen,
19595 expression,
19596 &rparen,
19597 &PM_LOCATION_TOKEN_VALUE(&keyword)
19598 );
19599 }
19601 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19602 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19603 }
19604
19605 parser_lex(parser);
19606 pm_token_t keyword = parser->previous;
19607
19608 if (context_def_p(parser)) {
19609 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19610 }
19611
19612 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19613 pm_token_t opening = parser->previous;
19614 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19615
19616 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19617 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19618 }
19620 parser_lex(parser);
19621 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19622 case PM_TOKEN_KEYWORD_FOR: {
19623 size_t opening_newline_index = token_newline_index(parser);
19624 parser_lex(parser);
19625
19626 pm_token_t for_keyword = parser->previous;
19627 pm_node_t *index;
19628
19629 context_push(parser, PM_CONTEXT_FOR_INDEX);
19630
19631 // First, parse out the first index expression.
19632 if (accept1(parser, PM_TOKEN_USTAR)) {
19633 pm_token_t star_operator = parser->previous;
19634 pm_node_t *name = NULL;
19635
19636 if (token_begins_expression_p(parser->current.type)) {
19637 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19638 }
19639
19640 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19641 } else if (token_begins_expression_p(parser->current.type)) {
19642 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19643 } else {
19644 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19645 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19646 }
19647
19648 // Now, if there are multiple index expressions, parse them out.
19649 if (match1(parser, PM_TOKEN_COMMA)) {
19650 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19651 } else {
19652 index = parse_target(parser, index, false, false);
19653 }
19654
19655 context_pop(parser);
19656 pm_do_loop_stack_push(parser, true);
19657
19658 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19659 pm_token_t in_keyword = parser->previous;
19660
19661 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19662 pm_do_loop_stack_pop(parser);
19663
19664 pm_token_t do_keyword;
19665 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19666 do_keyword = parser->previous;
19667 } else {
19668 do_keyword = not_provided(parser);
19669 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19670 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19671 }
19672 }
19673
19674 pm_statements_node_t *statements = NULL;
19675 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19676 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19677 }
19678
19679 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19680 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19681
19682 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19683 }
19685 if (parser_end_of_line_p(parser)) {
19686 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19687 }
19688
19689 size_t opening_newline_index = token_newline_index(parser);
19690 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19691 parser_lex(parser);
19692
19693 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19695 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19696 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19697 }
19698
19699 parser_lex(parser);
19700 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19701 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19702
19703 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19704 pm_node_destroy(parser, name);
19705 } else {
19706 pm_undef_node_append(undef, name);
19707
19708 while (match1(parser, PM_TOKEN_COMMA)) {
19709 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19710 parser_lex(parser);
19711 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19712
19713 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19714 pm_node_destroy(parser, name);
19715 break;
19716 }
19717
19718 pm_undef_node_append(undef, name);
19719 }
19720 }
19721
19722 return (pm_node_t *) undef;
19723 }
19724 case PM_TOKEN_KEYWORD_NOT: {
19725 parser_lex(parser);
19726
19727 pm_token_t message = parser->previous;
19728 pm_arguments_t arguments = { 0 };
19729 pm_node_t *receiver = NULL;
19730
19731 accept1(parser, PM_TOKEN_NEWLINE);
19732
19733 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19734 pm_token_t lparen = parser->previous;
19735
19736 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19737 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous);
19738 } else {
19739 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19740 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19741
19742 if (!parser->recovering) {
19743 accept1(parser, PM_TOKEN_NEWLINE);
19744 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19745 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19746 }
19747 }
19748 } else {
19749 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19750 }
19751
19752 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19753 }
19755 size_t opening_newline_index = token_newline_index(parser);
19756 parser_lex(parser);
19757
19758 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19759 }
19761 pm_node_list_t current_block_exits = { 0 };
19762 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19763
19764 size_t opening_newline_index = token_newline_index(parser);
19765 parser_lex(parser);
19766 pm_token_t module_keyword = parser->previous;
19767
19768 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19769 pm_token_t name;
19770
19771 // If we can recover from a syntax error that occurred while parsing
19772 // the name of the module, then we'll handle that here.
19773 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19774 pop_block_exits(parser, previous_block_exits);
19775 pm_node_list_free(&current_block_exits);
19776
19777 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19778 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19779 }
19780
19781 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19782 pm_token_t double_colon = parser->previous;
19783
19784 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19785 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19786 }
19787
19788 // Here we retrieve the name of the module. If it wasn't a constant,
19789 // then it's possible that `module foo` was passed, which is a
19790 // syntax error. We handle that here as well.
19791 name = parser->previous;
19792 if (name.type != PM_TOKEN_CONSTANT) {
19793 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19794 }
19795
19796 pm_parser_scope_push(parser, true);
19797 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19798 pm_node_t *statements = NULL;
19799
19801 pm_accepts_block_stack_push(parser, true);
19802 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19803 pm_accepts_block_stack_pop(parser);
19804 }
19805
19807 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19808 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19809 } else {
19810 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19811 }
19812
19813 pm_constant_id_list_t locals;
19814 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19815
19816 pm_parser_scope_pop(parser);
19817 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19818
19819 if (context_def_p(parser)) {
19820 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19821 }
19822
19823 pop_block_exits(parser, previous_block_exits);
19824 pm_node_list_free(&current_block_exits);
19825
19826 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19827 }
19829 parser_lex(parser);
19830 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19831 case PM_TOKEN_KEYWORD_REDO: {
19832 parser_lex(parser);
19833
19834 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19835 if (!parser->partial_script) parse_block_exit(parser, node);
19836
19837 return node;
19838 }
19840 parser_lex(parser);
19841
19842 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19843 parse_retry(parser, node);
19844
19845 return node;
19846 }
19848 parser_lex(parser);
19849 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19851 parser_lex(parser);
19852 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19854 size_t opening_newline_index = token_newline_index(parser);
19855
19856 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19857 pm_do_loop_stack_push(parser, true);
19858
19859 parser_lex(parser);
19860 pm_token_t keyword = parser->previous;
19861 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19862
19863 pm_do_loop_stack_pop(parser);
19864 context_pop(parser);
19865
19866 pm_token_t do_keyword;
19867 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19868 do_keyword = parser->previous;
19869 } else {
19870 do_keyword = not_provided(parser);
19871 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19872 }
19873
19874 pm_statements_node_t *statements = NULL;
19875 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19876 pm_accepts_block_stack_push(parser, true);
19877 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19878 pm_accepts_block_stack_pop(parser);
19879 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19880 }
19881
19882 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19883 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19884
19885 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19886 }
19888 size_t opening_newline_index = token_newline_index(parser);
19889
19890 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19891 pm_do_loop_stack_push(parser, true);
19892
19893 parser_lex(parser);
19894 pm_token_t keyword = parser->previous;
19895 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19896
19897 pm_do_loop_stack_pop(parser);
19898 context_pop(parser);
19899
19900 pm_token_t do_keyword;
19901 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19902 do_keyword = parser->previous;
19903 } else {
19904 do_keyword = not_provided(parser);
19905 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19906 }
19907
19908 pm_statements_node_t *statements = NULL;
19909 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19910 pm_accepts_block_stack_push(parser, true);
19911 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19912 pm_accepts_block_stack_pop(parser);
19913 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19914 }
19915
19916 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19917 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19918
19919 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19920 }
19922 parser_lex(parser);
19923 pm_token_t opening = parser->previous;
19924 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19925
19926 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19927 accept1(parser, PM_TOKEN_WORDS_SEP);
19928 if (match1(parser, PM_TOKEN_STRING_END)) break;
19929
19930 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19931 pm_token_t opening = not_provided(parser);
19932 pm_token_t closing = not_provided(parser);
19933 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19934 }
19935
19936 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19937 }
19938
19939 pm_token_t closing = parser->current;
19940 if (match1(parser, PM_TOKEN_EOF)) {
19941 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19942 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19943 } else {
19944 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19945 }
19946 pm_array_node_close_set(array, &closing);
19947
19948 return (pm_node_t *) array;
19949 }
19951 parser_lex(parser);
19952 pm_token_t opening = parser->previous;
19953 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19954
19955 // This is the current node that we are parsing that will be added to the
19956 // list of elements.
19957 pm_node_t *current = NULL;
19958
19959 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19960 switch (parser->current.type) {
19961 case PM_TOKEN_WORDS_SEP: {
19962 if (current == NULL) {
19963 // If we hit a separator before we have any content, then we don't
19964 // need to do anything.
19965 } else {
19966 // If we hit a separator after we've hit content, then we need to
19967 // append that content to the list and reset the current node.
19968 pm_array_node_elements_append(array, current);
19969 current = NULL;
19970 }
19971
19972 parser_lex(parser);
19973 break;
19974 }
19976 pm_token_t opening = not_provided(parser);
19977 pm_token_t closing = not_provided(parser);
19978
19979 if (current == NULL) {
19980 // If we hit content and the current node is NULL, then this is
19981 // the first string content we've seen. In that case we're going
19982 // to create a new string node and set that to the current.
19983 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19984 parser_lex(parser);
19985 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19986 // If we hit string content and the current node is an
19987 // interpolated string, then we need to append the string content
19988 // to the list of child nodes.
19989 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19990 parser_lex(parser);
19991
19992 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19993 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19994 // If we hit string content and the current node is a symbol node,
19995 // then we need to convert the current node into an interpolated
19996 // string and add the string content to the list of child nodes.
19997 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19998 pm_token_t bounds = not_provided(parser);
19999
20000 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20001 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20002 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20003 parser_lex(parser);
20004
20005 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20006 pm_interpolated_symbol_node_append(interpolated, first_string);
20007 pm_interpolated_symbol_node_append(interpolated, second_string);
20008
20009 xfree(current);
20010 current = (pm_node_t *) interpolated;
20011 } else {
20012 assert(false && "unreachable");
20013 }
20014
20015 break;
20016 }
20017 case PM_TOKEN_EMBVAR: {
20018 bool start_location_set = false;
20019 if (current == NULL) {
20020 // If we hit an embedded variable and the current node is NULL,
20021 // then this is the start of a new string. We'll set the current
20022 // node to a new interpolated string.
20023 pm_token_t opening = not_provided(parser);
20024 pm_token_t closing = not_provided(parser);
20025 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20026 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20027 // If we hit an embedded variable and the current node is a string
20028 // node, then we'll convert the current into an interpolated
20029 // string and add the string node to the list of parts.
20030 pm_token_t opening = not_provided(parser);
20031 pm_token_t closing = not_provided(parser);
20032 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20033
20034 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20035 pm_interpolated_symbol_node_append(interpolated, current);
20036 interpolated->base.location.start = current->location.start;
20037 start_location_set = true;
20038 current = (pm_node_t *) interpolated;
20039 } else {
20040 // If we hit an embedded variable and the current node is an
20041 // interpolated string, then we'll just add the embedded variable.
20042 }
20043
20044 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20045 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20046 if (!start_location_set) {
20047 current->location.start = part->location.start;
20048 }
20049 break;
20050 }
20052 bool start_location_set = false;
20053 if (current == NULL) {
20054 // If we hit an embedded expression and the current node is NULL,
20055 // then this is the start of a new string. We'll set the current
20056 // node to a new interpolated string.
20057 pm_token_t opening = not_provided(parser);
20058 pm_token_t closing = not_provided(parser);
20059 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20060 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20061 // If we hit an embedded expression and the current node is a
20062 // string node, then we'll convert the current into an
20063 // interpolated string and add the string node to the list of
20064 // parts.
20065 pm_token_t opening = not_provided(parser);
20066 pm_token_t closing = not_provided(parser);
20067 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20068
20069 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20070 pm_interpolated_symbol_node_append(interpolated, current);
20071 interpolated->base.location.start = current->location.start;
20072 start_location_set = true;
20073 current = (pm_node_t *) interpolated;
20074 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20075 // If we hit an embedded expression and the current node is an
20076 // interpolated string, then we'll just continue on.
20077 } else {
20078 assert(false && "unreachable");
20079 }
20080
20081 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20082 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20083 if (!start_location_set) {
20084 current->location.start = part->location.start;
20085 }
20086 break;
20087 }
20088 default:
20089 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20090 parser_lex(parser);
20091 break;
20092 }
20093 }
20094
20095 // If we have a current node, then we need to append it to the list.
20096 if (current) {
20097 pm_array_node_elements_append(array, current);
20098 }
20099
20100 pm_token_t closing = parser->current;
20101 if (match1(parser, PM_TOKEN_EOF)) {
20102 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20103 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20104 } else {
20105 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20106 }
20107 pm_array_node_close_set(array, &closing);
20108
20109 return (pm_node_t *) array;
20110 }
20112 parser_lex(parser);
20113 pm_token_t opening = parser->previous;
20114 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20115
20116 // skip all leading whitespaces
20117 accept1(parser, PM_TOKEN_WORDS_SEP);
20118
20119 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20120 accept1(parser, PM_TOKEN_WORDS_SEP);
20121 if (match1(parser, PM_TOKEN_STRING_END)) break;
20122
20123 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20124 pm_token_t opening = not_provided(parser);
20125 pm_token_t closing = not_provided(parser);
20126
20127 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20128 pm_array_node_elements_append(array, string);
20129 }
20130
20131 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20132 }
20133
20134 pm_token_t closing = parser->current;
20135 if (match1(parser, PM_TOKEN_EOF)) {
20136 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20137 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20138 } else {
20139 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20140 }
20141
20142 pm_array_node_close_set(array, &closing);
20143 return (pm_node_t *) array;
20144 }
20146 parser_lex(parser);
20147 pm_token_t opening = parser->previous;
20148 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20149
20150 // This is the current node that we are parsing that will be added
20151 // to the list of elements.
20152 pm_node_t *current = NULL;
20153
20154 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20155 switch (parser->current.type) {
20156 case PM_TOKEN_WORDS_SEP: {
20157 // Reset the explicit encoding if we hit a separator
20158 // since each element can have its own encoding.
20159 parser->explicit_encoding = NULL;
20160
20161 if (current == NULL) {
20162 // If we hit a separator before we have any content,
20163 // then we don't need to do anything.
20164 } else {
20165 // If we hit a separator after we've hit content,
20166 // then we need to append that content to the list
20167 // and reset the current node.
20168 pm_array_node_elements_append(array, current);
20169 current = NULL;
20170 }
20171
20172 parser_lex(parser);
20173 break;
20174 }
20176 pm_token_t opening = not_provided(parser);
20177 pm_token_t closing = not_provided(parser);
20178
20179 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20180 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20181 parser_lex(parser);
20182
20183 if (current == NULL) {
20184 // If we hit content and the current node is NULL,
20185 // then this is the first string content we've seen.
20186 // In that case we're going to create a new string
20187 // node and set that to the current.
20188 current = string;
20189 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20190 // If we hit string content and the current node is
20191 // an interpolated string, then we need to append
20192 // the string content to the list of child nodes.
20193 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20194 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20195 // If we hit string content and the current node is
20196 // a string node, then we need to convert the
20197 // current node into an interpolated string and add
20198 // the string content to the list of child nodes.
20199 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20200 pm_interpolated_string_node_append(interpolated, current);
20201 pm_interpolated_string_node_append(interpolated, string);
20202 current = (pm_node_t *) interpolated;
20203 } else {
20204 assert(false && "unreachable");
20205 }
20206
20207 break;
20208 }
20209 case PM_TOKEN_EMBVAR: {
20210 if (current == NULL) {
20211 // If we hit an embedded variable and the current
20212 // node is NULL, then this is the start of a new
20213 // string. We'll set the current node to a new
20214 // interpolated string.
20215 pm_token_t opening = not_provided(parser);
20216 pm_token_t closing = not_provided(parser);
20217 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20218 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20219 // If we hit an embedded variable and the current
20220 // node is a string node, then we'll convert the
20221 // current into an interpolated string and add the
20222 // string node to the list of parts.
20223 pm_token_t opening = not_provided(parser);
20224 pm_token_t closing = not_provided(parser);
20225 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20226 pm_interpolated_string_node_append(interpolated, current);
20227 current = (pm_node_t *) interpolated;
20228 } else {
20229 // If we hit an embedded variable and the current
20230 // node is an interpolated string, then we'll just
20231 // add the embedded variable.
20232 }
20233
20234 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20235 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20236 break;
20237 }
20239 if (current == NULL) {
20240 // If we hit an embedded expression and the current
20241 // node is NULL, then this is the start of a new
20242 // string. We'll set the current node to a new
20243 // interpolated string.
20244 pm_token_t opening = not_provided(parser);
20245 pm_token_t closing = not_provided(parser);
20246 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20247 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20248 // If we hit an embedded expression and the current
20249 // node is a string node, then we'll convert the
20250 // current into an interpolated string and add the
20251 // string node to the list of parts.
20252 pm_token_t opening = not_provided(parser);
20253 pm_token_t closing = not_provided(parser);
20254 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20255 pm_interpolated_string_node_append(interpolated, current);
20256 current = (pm_node_t *) interpolated;
20257 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20258 // If we hit an embedded expression and the current
20259 // node is an interpolated string, then we'll just
20260 // continue on.
20261 } else {
20262 assert(false && "unreachable");
20263 }
20264
20265 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20266 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20267 break;
20268 }
20269 default:
20270 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20271 parser_lex(parser);
20272 break;
20273 }
20274 }
20275
20276 // If we have a current node, then we need to append it to the list.
20277 if (current) {
20278 pm_array_node_elements_append(array, current);
20279 }
20280
20281 pm_token_t closing = parser->current;
20282 if (match1(parser, PM_TOKEN_EOF)) {
20283 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20284 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20285 } else {
20286 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20287 }
20288
20289 pm_array_node_close_set(array, &closing);
20290 return (pm_node_t *) array;
20291 }
20292 case PM_TOKEN_REGEXP_BEGIN: {
20293 pm_token_t opening = parser->current;
20294 parser_lex(parser);
20295
20296 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20297 // If we get here, then we have an end immediately after a start. In
20298 // that case we'll create an empty content token and return an
20299 // uninterpolated regular expression.
20300 pm_token_t content = (pm_token_t) {
20302 .start = parser->previous.end,
20303 .end = parser->previous.end
20304 };
20305
20306 parser_lex(parser);
20307
20308 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20310
20311 return node;
20312 }
20313
20315
20316 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20317 // In this case we've hit string content so we know the regular
20318 // expression at least has something in it. We'll need to check if the
20319 // following token is the end (in which case we can return a plain
20320 // regular expression) or if it's not then it has interpolation.
20321 pm_string_t unescaped = parser->current_string;
20322 pm_token_t content = parser->current;
20323 bool ascii_only = parser->current_regular_expression_ascii_only;
20324 parser_lex(parser);
20325
20326 // If we hit an end, then we can create a regular expression
20327 // node without interpolation, which can be represented more
20328 // succinctly and more easily compiled.
20329 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20330 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20331
20332 // If we're not immediately followed by a =~, then we want
20333 // to parse all of the errors at this point. If it is
20334 // followed by a =~, then it will get parsed higher up while
20335 // parsing the named captures as well.
20336 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20337 parse_regular_expression_errors(parser, node);
20338 }
20339
20340 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20341 return (pm_node_t *) node;
20342 }
20343
20344 // If we get here, then we have interpolation so we'll need to create
20345 // a regular expression node with interpolation.
20346 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20347
20348 pm_token_t opening = not_provided(parser);
20349 pm_token_t closing = not_provided(parser);
20350 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20351
20352 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20353 // This is extremely strange, but the first string part of a
20354 // regular expression will always be tagged as binary if we
20355 // are in a US-ASCII file, no matter its contents.
20356 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20357 }
20358
20359 pm_interpolated_regular_expression_node_append(interpolated, part);
20360 } else {
20361 // If the first part of the body of the regular expression is not a
20362 // string content, then we have interpolation and we need to create an
20363 // interpolated regular expression node.
20364 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20365 }
20366
20367 // Now that we're here and we have interpolation, we'll parse all of the
20368 // parts into the list.
20369 pm_node_t *part;
20370 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20371 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20372 pm_interpolated_regular_expression_node_append(interpolated, part);
20373 }
20374 }
20375
20376 pm_token_t closing = parser->current;
20377 if (match1(parser, PM_TOKEN_EOF)) {
20378 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20379 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20380 } else {
20381 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20382 }
20383
20384 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20385 return (pm_node_t *) interpolated;
20386 }
20387 case PM_TOKEN_BACKTICK:
20389 parser_lex(parser);
20390 pm_token_t opening = parser->previous;
20391
20392 // When we get here, we don't know if this string is going to have
20393 // interpolation or not, even though it is allowed. Still, we want to be
20394 // able to return a string node without interpolation if we can since
20395 // it'll be faster.
20396 if (match1(parser, PM_TOKEN_STRING_END)) {
20397 // If we get here, then we have an end immediately after a start. In
20398 // that case we'll create an empty content token and return an
20399 // uninterpolated string.
20400 pm_token_t content = (pm_token_t) {
20402 .start = parser->previous.end,
20403 .end = parser->previous.end
20404 };
20405
20406 parser_lex(parser);
20407 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20408 }
20409
20411
20412 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20413 // In this case we've hit string content so we know the string
20414 // at least has something in it. We'll need to check if the
20415 // following token is the end (in which case we can return a
20416 // plain string) or if it's not then it has interpolation.
20417 pm_string_t unescaped = parser->current_string;
20418 pm_token_t content = parser->current;
20419 parser_lex(parser);
20420
20421 if (match1(parser, PM_TOKEN_STRING_END)) {
20422 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20423 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20424 parser_lex(parser);
20425 return node;
20426 }
20427
20428 // If we get here, then we have interpolation so we'll need to
20429 // create a string node with interpolation.
20430 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20431
20432 pm_token_t opening = not_provided(parser);
20433 pm_token_t closing = not_provided(parser);
20434
20435 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20436 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20437
20438 pm_interpolated_xstring_node_append(node, part);
20439 } else {
20440 // If the first part of the body of the string is not a string
20441 // content, then we have interpolation and we need to create an
20442 // interpolated string node.
20443 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20444 }
20445
20446 pm_node_t *part;
20447 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20448 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20449 pm_interpolated_xstring_node_append(node, part);
20450 }
20451 }
20452
20453 pm_token_t closing = parser->current;
20454 if (match1(parser, PM_TOKEN_EOF)) {
20455 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20456 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20457 } else {
20458 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20459 }
20460 pm_interpolated_xstring_node_closing_set(node, &closing);
20461
20462 return (pm_node_t *) node;
20463 }
20464 case PM_TOKEN_USTAR: {
20465 parser_lex(parser);
20466
20467 // * operators at the beginning of expressions are only valid in the
20468 // context of a multiple assignment. We enforce that here. We'll
20469 // still lex past it though and create a missing node place.
20470 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20471 pm_parser_err_prefix(parser, diag_id);
20472 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20473 }
20474
20475 pm_token_t operator = parser->previous;
20476 pm_node_t *name = NULL;
20477
20478 if (token_begins_expression_p(parser->current.type)) {
20479 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20480 }
20481
20482 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20483
20484 if (match1(parser, PM_TOKEN_COMMA)) {
20485 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20486 } else {
20487 return parse_target_validate(parser, splat, true);
20488 }
20489 }
20490 case PM_TOKEN_BANG: {
20491 if (binding_power > PM_BINDING_POWER_UNARY) {
20492 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20493 }
20494
20495 parser_lex(parser);
20496
20497 pm_token_t operator = parser->previous;
20498 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20499 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20500
20501 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20502 return (pm_node_t *) node;
20503 }
20504 case PM_TOKEN_TILDE: {
20505 if (binding_power > PM_BINDING_POWER_UNARY) {
20506 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20507 }
20508 parser_lex(parser);
20509
20510 pm_token_t operator = parser->previous;
20511 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20512 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20513
20514 return (pm_node_t *) node;
20515 }
20516 case PM_TOKEN_UMINUS: {
20517 if (binding_power > PM_BINDING_POWER_UNARY) {
20518 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20519 }
20520 parser_lex(parser);
20521
20522 pm_token_t operator = parser->previous;
20523 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20524 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20525
20526 return (pm_node_t *) node;
20527 }
20528 case PM_TOKEN_UMINUS_NUM: {
20529 parser_lex(parser);
20530
20531 pm_token_t operator = parser->previous;
20532 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20533
20534 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20535 pm_token_t exponent_operator = parser->previous;
20536 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20537 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20538 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20539 } else {
20540 switch (PM_NODE_TYPE(node)) {
20541 case PM_INTEGER_NODE:
20542 case PM_FLOAT_NODE:
20543 case PM_RATIONAL_NODE:
20544 case PM_IMAGINARY_NODE:
20545 parse_negative_numeric(node);
20546 break;
20547 default:
20548 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20549 break;
20550 }
20551 }
20552
20553 return node;
20554 }
20556 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20558
20559 size_t opening_newline_index = token_newline_index(parser);
20560 pm_accepts_block_stack_push(parser, true);
20561 parser_lex(parser);
20562
20563 pm_token_t operator = parser->previous;
20564 pm_parser_scope_push(parser, false);
20565
20566 pm_block_parameters_node_t *block_parameters;
20567
20568 switch (parser->current.type) {
20570 pm_token_t opening = parser->current;
20571 parser_lex(parser);
20572
20573 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20574 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20575 } else {
20576 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20577 }
20578
20579 accept1(parser, PM_TOKEN_NEWLINE);
20580 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20581
20582 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20583 break;
20584 }
20585 case PM_CASE_PARAMETER: {
20586 pm_accepts_block_stack_push(parser, false);
20587 pm_token_t opening = not_provided(parser);
20588 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20589 pm_accepts_block_stack_pop(parser);
20590 break;
20591 }
20592 default: {
20593 block_parameters = NULL;
20594 break;
20595 }
20596 }
20597
20598 pm_token_t opening;
20599 pm_node_t *body = NULL;
20600 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20601
20602 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20603 opening = parser->previous;
20604
20605 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20606 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20607 }
20608
20609 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20610 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20611 } else {
20612 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20613 opening = parser->previous;
20614
20616 pm_accepts_block_stack_push(parser, true);
20617 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20618 pm_accepts_block_stack_pop(parser);
20619 }
20620
20621 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20622 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20623 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20624 } else {
20625 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20626 }
20627
20628 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20629 }
20630
20631 pm_constant_id_list_t locals;
20632 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20633 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20634
20635 pm_parser_scope_pop(parser);
20636 pm_accepts_block_stack_pop(parser);
20637
20638 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20639 }
20640 case PM_TOKEN_UPLUS: {
20641 if (binding_power > PM_BINDING_POWER_UNARY) {
20642 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20643 }
20644 parser_lex(parser);
20645
20646 pm_token_t operator = parser->previous;
20647 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20648 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20649
20650 return (pm_node_t *) node;
20651 }
20653 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20654 case PM_TOKEN_SYMBOL_BEGIN: {
20655 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20656 parser_lex(parser);
20657
20658 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20659 }
20660 default: {
20661 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20662
20663 if (recoverable != PM_CONTEXT_NONE) {
20664 parser->recovering = true;
20665
20666 // If the given error is not the generic one, then we'll add it
20667 // here because it will provide more context in addition to the
20668 // recoverable error that we will also add.
20669 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20670 pm_parser_err_prefix(parser, diag_id);
20671 }
20672
20673 // If we get here, then we are assuming this token is closing a
20674 // parent context, so we'll indicate that to the user so that
20675 // they know how we behaved.
20676 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20677 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20678 // We're going to make a special case here, because "cannot
20679 // parse expression" is pretty generic, and we know here that we
20680 // have an unexpected token.
20681 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20682 } else {
20683 pm_parser_err_prefix(parser, diag_id);
20684 }
20685
20686 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20687 }
20688 }
20689}
20690
20700static pm_node_t *
20701parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20702 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20703
20704 // Contradicting binding powers, the right-hand-side value of the assignment
20705 // allows the `rescue` modifier.
20706 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20707 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20708
20709 pm_token_t rescue = parser->current;
20710 parser_lex(parser);
20711
20712 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20713 context_pop(parser);
20714
20715 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20716 }
20717
20718 return value;
20719}
20720
20725static void
20726parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20727 switch (PM_NODE_TYPE(node)) {
20728 case PM_BEGIN_NODE: {
20729 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20730 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20731 break;
20732 }
20735 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20736 break;
20737 }
20738 case PM_PARENTHESES_NODE: {
20739 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20740 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20741 break;
20742 }
20743 case PM_STATEMENTS_NODE: {
20744 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20745 const pm_node_t *statement;
20746
20747 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20748 parse_assignment_value_local(parser, statement);
20749 }
20750 break;
20751 }
20752 default:
20753 break;
20754 }
20755}
20756
20769static pm_node_t *
20770parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20771 bool permitted = true;
20772 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20773
20774 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20775 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20776
20777 parse_assignment_value_local(parser, value);
20778 bool single_value = true;
20779
20780 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20781 single_value = false;
20782
20783 pm_token_t opening = not_provided(parser);
20784 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20785
20786 pm_array_node_elements_append(array, value);
20787 value = (pm_node_t *) array;
20788
20789 while (accept1(parser, PM_TOKEN_COMMA)) {
20790 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20791
20792 pm_array_node_elements_append(array, element);
20793 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20794
20795 parse_assignment_value_local(parser, element);
20796 }
20797 }
20798
20799 // Contradicting binding powers, the right-hand-side value of the assignment
20800 // allows the `rescue` modifier.
20801 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20802 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20803
20804 pm_token_t rescue = parser->current;
20805 parser_lex(parser);
20806
20807 bool accepts_command_call_inner = false;
20808
20809 // RHS can accept command call iff the value is a call with arguments
20810 // but without parenthesis.
20811 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20812 pm_call_node_t *call_node = (pm_call_node_t *) value;
20813 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20814 accepts_command_call_inner = true;
20815 }
20816 }
20817
20818 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20819 context_pop(parser);
20820
20821 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20822 }
20823
20824 return value;
20825}
20826
20834static void
20835parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20836 if (call_node->arguments != NULL) {
20837 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20838 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20839 call_node->arguments = NULL;
20840 }
20841
20842 if (call_node->block != NULL) {
20843 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20844 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20845 call_node->block = NULL;
20846 }
20847}
20848
20873
20874static inline const uint8_t *
20875pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20876 cursor++;
20877
20878 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20879 uint8_t value = escape_hexadecimal_digit(*cursor);
20880 cursor++;
20881
20882 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20883 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20884 cursor++;
20885 }
20886
20887 pm_buffer_append_byte(unescaped, value);
20888 } else {
20889 pm_buffer_append_string(unescaped, "\\x", 2);
20890 }
20891
20892 return cursor;
20893}
20894
20895static inline const uint8_t *
20896pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20897 uint8_t value = (uint8_t) (*cursor - '0');
20898 cursor++;
20899
20900 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20901 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20902 cursor++;
20903
20904 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20905 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20906 cursor++;
20907 }
20908 }
20909
20910 pm_buffer_append_byte(unescaped, value);
20911 return cursor;
20912}
20913
20914static inline const uint8_t *
20915pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20916 const uint8_t *start = cursor - 1;
20917 cursor++;
20918
20919 if (cursor >= end) {
20920 pm_buffer_append_string(unescaped, "\\u", 2);
20921 return cursor;
20922 }
20923
20924 if (*cursor != '{') {
20925 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20926 uint32_t value = escape_unicode(parser, cursor, length);
20927
20928 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20929 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20930 }
20931
20932 return cursor + length;
20933 }
20934
20935 cursor++;
20936 for (;;) {
20937 while (cursor < end && *cursor == ' ') cursor++;
20938
20939 if (cursor >= end) break;
20940 if (*cursor == '}') {
20941 cursor++;
20942 break;
20943 }
20944
20945 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20946 uint32_t value = escape_unicode(parser, cursor, length);
20947
20948 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20949 cursor += length;
20950 }
20951
20952 return cursor;
20953}
20954
20955static void
20956pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
20957 const uint8_t *end = source + length;
20958 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20959
20960 for (;;) {
20961 if (++cursor >= end) {
20962 pm_buffer_append_byte(unescaped, '\\');
20963 return;
20964 }
20965
20966 switch (*cursor) {
20967 case 'x':
20968 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20969 break;
20970 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20971 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20972 break;
20973 case 'u':
20974 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
20975 break;
20976 default:
20977 pm_buffer_append_byte(unescaped, '\\');
20978 break;
20979 }
20980
20981 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20982 if (next_cursor == NULL) break;
20983
20984 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20985 cursor = next_cursor;
20986 }
20987
20988 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20989}
20990
20995static void
20996parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20998
20999 pm_parser_t *parser = callback_data->parser;
21000 pm_call_node_t *call = callback_data->call;
21001 pm_constant_id_list_t *names = &callback_data->names;
21002
21003 const uint8_t *source = pm_string_source(capture);
21004 size_t length = pm_string_length(capture);
21005 pm_buffer_t unescaped = { 0 };
21006
21007 // First, we need to handle escapes within the name of the capture group.
21008 // This is because regular expressions have three different representations
21009 // in prism. The first is the plain source code. The second is the
21010 // representation that will be sent to the regular expression engine, which
21011 // is the value of the "unescaped" field. This is poorly named, because it
21012 // actually still contains escapes, just a subset of them that the regular
21013 // expression engine knows how to handle. The third representation is fully
21014 // unescaped, which is what we need.
21015 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21016 if (PRISM_UNLIKELY(cursor != NULL)) {
21017 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21018 source = (const uint8_t *) pm_buffer_value(&unescaped);
21019 length = pm_buffer_length(&unescaped);
21020 }
21021
21022 pm_location_t location;
21023 pm_constant_id_t name;
21024
21025 // If the name of the capture group isn't a valid identifier, we do
21026 // not add it to the local table.
21027 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21028 pm_buffer_free(&unescaped);
21029 return;
21030 }
21031
21032 if (callback_data->shared) {
21033 // If the unescaped string is a slice of the source, then we can
21034 // copy the names directly. The pointers will line up.
21035 location = (pm_location_t) { .start = source, .end = source + length };
21036 name = pm_parser_constant_id_location(parser, location.start, location.end);
21037 } else {
21038 // Otherwise, the name is a slice of the malloc-ed owned string,
21039 // in which case we need to copy it out into a new string.
21040 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21041
21042 void *memory = xmalloc(length);
21043 if (memory == NULL) abort();
21044
21045 memcpy(memory, source, length);
21046 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21047 }
21048
21049 // Add this name to the list of constants if it is valid, not duplicated,
21050 // and not a keyword.
21051 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21052 pm_constant_id_list_append(names, name);
21053
21054 int depth;
21055 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21056 // If the local is not already a local but it is a keyword, then we
21057 // do not want to add a capture for this.
21058 if (pm_local_is_keyword((const char *) source, length)) {
21059 pm_buffer_free(&unescaped);
21060 return;
21061 }
21062
21063 // If the identifier is not already a local, then we will add it to
21064 // the local table.
21065 pm_parser_local_add(parser, name, location.start, location.end, 0);
21066 }
21067
21068 // Here we lazily create the MatchWriteNode since we know we're
21069 // about to add a target.
21070 if (callback_data->match == NULL) {
21071 callback_data->match = pm_match_write_node_create(parser, call);
21072 }
21073
21074 // Next, create the local variable target and add it to the list of
21075 // targets for the match.
21076 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21077 pm_node_list_append(&callback_data->match->targets, target);
21078 }
21079
21080 pm_buffer_free(&unescaped);
21081}
21082
21087static pm_node_t *
21088parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21090 .parser = parser,
21091 .call = call,
21092 .names = { 0 },
21093 .shared = content->type == PM_STRING_SHARED
21094 };
21095
21097 .parser = parser,
21098 .start = call->receiver->location.start,
21099 .end = call->receiver->location.end,
21100 .shared = content->type == PM_STRING_SHARED
21101 };
21102
21103 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21104 pm_constant_id_list_free(&callback_data.names);
21105
21106 if (callback_data.match != NULL) {
21107 return (pm_node_t *) callback_data.match;
21108 } else {
21109 return (pm_node_t *) call;
21110 }
21111}
21112
21113static inline pm_node_t *
21114parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21115 pm_token_t token = parser->current;
21116
21117 switch (token.type) {
21118 case PM_TOKEN_EQUAL: {
21119 switch (PM_NODE_TYPE(node)) {
21120 case PM_CALL_NODE: {
21121 // If we have no arguments to the call node and we need this
21122 // to be a target then this is either a method call or a
21123 // local variable write. This _must_ happen before the value
21124 // is parsed because it could be referenced in the value.
21125 pm_call_node_t *call_node = (pm_call_node_t *) node;
21127 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21128 }
21129 }
21131 case PM_CASE_WRITABLE: {
21132 parser_lex(parser);
21133 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21134
21135 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21136 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21137 }
21138
21139 return parse_write(parser, node, &token, value);
21140 }
21141 case PM_SPLAT_NODE: {
21142 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21143 pm_multi_target_node_targets_append(parser, multi_target, node);
21144
21145 parser_lex(parser);
21146 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21147 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21148 }
21150 case PM_FALSE_NODE:
21153 case PM_NIL_NODE:
21154 case PM_SELF_NODE:
21155 case PM_TRUE_NODE: {
21156 // In these special cases, we have specific error messages
21157 // and we will replace them with local variable writes.
21158 parser_lex(parser);
21159 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21160 return parse_unwriteable_write(parser, node, &token, value);
21161 }
21162 default:
21163 // In this case we have an = sign, but we don't know what
21164 // it's for. We need to treat it as an error. We'll mark it
21165 // as an error and skip past it.
21166 parser_lex(parser);
21167 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21168 return node;
21169 }
21170 }
21172 switch (PM_NODE_TYPE(node)) {
21175 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21178 parser_lex(parser);
21179
21180 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21181 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21182
21183 pm_node_destroy(parser, node);
21184 return result;
21185 }
21187 parser_lex(parser);
21188
21189 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21190 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21191
21192 pm_node_destroy(parser, node);
21193 return result;
21194 }
21195 case PM_CONSTANT_PATH_NODE: {
21196 parser_lex(parser);
21197
21198 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21199 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21200
21201 return parse_shareable_constant_write(parser, write);
21202 }
21203 case PM_CONSTANT_READ_NODE: {
21204 parser_lex(parser);
21205
21206 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21207 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21208
21209 pm_node_destroy(parser, node);
21210 return parse_shareable_constant_write(parser, write);
21211 }
21213 parser_lex(parser);
21214
21215 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21216 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21217
21218 pm_node_destroy(parser, node);
21219 return result;
21220 }
21222 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21223 parser_lex(parser);
21224
21225 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21226 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21227
21228 parse_target_implicit_parameter(parser, node);
21229 pm_node_destroy(parser, node);
21230 return result;
21231 }
21233 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21234 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21235 parse_target_implicit_parameter(parser, node);
21236 }
21237
21239 parser_lex(parser);
21240
21241 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21242 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21243
21244 pm_node_destroy(parser, node);
21245 return result;
21246 }
21247 case PM_CALL_NODE: {
21248 pm_call_node_t *cast = (pm_call_node_t *) node;
21249
21250 // If we have a vcall (a method with no arguments and no
21251 // receiver that could have been a local variable) then we
21252 // will transform it into a local variable write.
21254 pm_location_t *message_loc = &cast->message_loc;
21255 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21256
21257 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21258 parser_lex(parser);
21259
21260 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21261 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21262
21263 pm_node_destroy(parser, (pm_node_t *) cast);
21264 return result;
21265 }
21266
21267 // Move past the token here so that we have already added
21268 // the local variable by this point.
21269 parser_lex(parser);
21270
21271 // If there is no call operator and the message is "[]" then
21272 // this is an aref expression, and we can transform it into
21273 // an aset expression.
21274 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21275 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21276 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21277 }
21278
21279 // If this node cannot be writable, then we have an error.
21280 if (pm_call_node_writable_p(parser, cast)) {
21281 parse_write_name(parser, &cast->name);
21282 } else {
21283 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21284 }
21285
21286 parse_call_operator_write(parser, cast, &token);
21287 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21288 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21289 }
21290 case PM_MULTI_WRITE_NODE: {
21291 parser_lex(parser);
21292 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21293 return node;
21294 }
21295 default:
21296 parser_lex(parser);
21297
21298 // In this case we have an &&= sign, but we don't know what it's for.
21299 // We need to treat it as an error. For now, we'll mark it as an error
21300 // and just skip right past it.
21301 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21302 return node;
21303 }
21304 }
21306 switch (PM_NODE_TYPE(node)) {
21309 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21312 parser_lex(parser);
21313
21314 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21315 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21316
21317 pm_node_destroy(parser, node);
21318 return result;
21319 }
21321 parser_lex(parser);
21322
21323 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21324 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21325
21326 pm_node_destroy(parser, node);
21327 return result;
21328 }
21329 case PM_CONSTANT_PATH_NODE: {
21330 parser_lex(parser);
21331
21332 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21333 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21334
21335 return parse_shareable_constant_write(parser, write);
21336 }
21337 case PM_CONSTANT_READ_NODE: {
21338 parser_lex(parser);
21339
21340 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21341 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21342
21343 pm_node_destroy(parser, node);
21344 return parse_shareable_constant_write(parser, write);
21345 }
21347 parser_lex(parser);
21348
21349 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21350 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21351
21352 pm_node_destroy(parser, node);
21353 return result;
21354 }
21356 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21357 parser_lex(parser);
21358
21359 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21360 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21361
21362 parse_target_implicit_parameter(parser, node);
21363 pm_node_destroy(parser, node);
21364 return result;
21365 }
21367 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21368 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21369 parse_target_implicit_parameter(parser, node);
21370 }
21371
21373 parser_lex(parser);
21374
21375 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21376 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21377
21378 pm_node_destroy(parser, node);
21379 return result;
21380 }
21381 case PM_CALL_NODE: {
21382 pm_call_node_t *cast = (pm_call_node_t *) node;
21383
21384 // If we have a vcall (a method with no arguments and no
21385 // receiver that could have been a local variable) then we
21386 // will transform it into a local variable write.
21388 pm_location_t *message_loc = &cast->message_loc;
21389 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21390
21391 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21392 parser_lex(parser);
21393
21394 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21395 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21396
21397 pm_node_destroy(parser, (pm_node_t *) cast);
21398 return result;
21399 }
21400
21401 // Move past the token here so that we have already added
21402 // the local variable by this point.
21403 parser_lex(parser);
21404
21405 // If there is no call operator and the message is "[]" then
21406 // this is an aref expression, and we can transform it into
21407 // an aset expression.
21408 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21409 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21410 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21411 }
21412
21413 // If this node cannot be writable, then we have an error.
21414 if (pm_call_node_writable_p(parser, cast)) {
21415 parse_write_name(parser, &cast->name);
21416 } else {
21417 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21418 }
21419
21420 parse_call_operator_write(parser, cast, &token);
21421 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21422 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21423 }
21424 case PM_MULTI_WRITE_NODE: {
21425 parser_lex(parser);
21426 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21427 return node;
21428 }
21429 default:
21430 parser_lex(parser);
21431
21432 // In this case we have an ||= sign, but we don't know what it's for.
21433 // We need to treat it as an error. For now, we'll mark it as an error
21434 // and just skip right past it.
21435 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21436 return node;
21437 }
21438 }
21450 switch (PM_NODE_TYPE(node)) {
21453 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21456 parser_lex(parser);
21457
21458 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21459 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21460
21461 pm_node_destroy(parser, node);
21462 return result;
21463 }
21465 parser_lex(parser);
21466
21467 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21468 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21469
21470 pm_node_destroy(parser, node);
21471 return result;
21472 }
21473 case PM_CONSTANT_PATH_NODE: {
21474 parser_lex(parser);
21475
21476 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21477 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21478
21479 return parse_shareable_constant_write(parser, write);
21480 }
21481 case PM_CONSTANT_READ_NODE: {
21482 parser_lex(parser);
21483
21484 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21485 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21486
21487 pm_node_destroy(parser, node);
21488 return parse_shareable_constant_write(parser, write);
21489 }
21491 parser_lex(parser);
21492
21493 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21494 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21495
21496 pm_node_destroy(parser, node);
21497 return result;
21498 }
21500 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21501 parser_lex(parser);
21502
21503 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21504 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21505
21506 parse_target_implicit_parameter(parser, node);
21507 pm_node_destroy(parser, node);
21508 return result;
21509 }
21511 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21512 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21513 parse_target_implicit_parameter(parser, node);
21514 }
21515
21517 parser_lex(parser);
21518
21519 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21520 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21521
21522 pm_node_destroy(parser, node);
21523 return result;
21524 }
21525 case PM_CALL_NODE: {
21526 parser_lex(parser);
21527 pm_call_node_t *cast = (pm_call_node_t *) node;
21528
21529 // If we have a vcall (a method with no arguments and no
21530 // receiver that could have been a local variable) then we
21531 // will transform it into a local variable write.
21533 pm_location_t *message_loc = &cast->message_loc;
21534 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21535
21536 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21537 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21538 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21539
21540 pm_node_destroy(parser, (pm_node_t *) cast);
21541 return result;
21542 }
21543
21544 // If there is no call operator and the message is "[]" then
21545 // this is an aref expression, and we can transform it into
21546 // an aset expression.
21547 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21548 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21549 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21550 }
21551
21552 // If this node cannot be writable, then we have an error.
21553 if (pm_call_node_writable_p(parser, cast)) {
21554 parse_write_name(parser, &cast->name);
21555 } else {
21556 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21557 }
21558
21559 parse_call_operator_write(parser, cast, &token);
21560 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21561 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21562 }
21563 case PM_MULTI_WRITE_NODE: {
21564 parser_lex(parser);
21565 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21566 return node;
21567 }
21568 default:
21569 parser_lex(parser);
21570
21571 // In this case we have an operator but we don't know what it's for.
21572 // We need to treat it as an error. For now, we'll mark it as an error
21573 // and just skip right past it.
21574 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21575 return node;
21576 }
21577 }
21579 case PM_TOKEN_KEYWORD_AND: {
21580 parser_lex(parser);
21581
21582 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21583 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21584 }
21586 case PM_TOKEN_PIPE_PIPE: {
21587 parser_lex(parser);
21588
21589 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21590 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21591 }
21592 case PM_TOKEN_EQUAL_TILDE: {
21593 // Note that we _must_ parse the value before adding the local
21594 // variables in order to properly mirror the behavior of Ruby. For
21595 // example,
21596 //
21597 // /(?<foo>bar)/ =~ foo
21598 //
21599 // In this case, `foo` should be a method call and not a local yet.
21600 parser_lex(parser);
21601 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21602
21603 // By default, we're going to create a call node and then return it.
21604 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21605 pm_node_t *result = (pm_node_t *) call;
21606
21607 // If the receiver of this =~ is a regular expression node, then we
21608 // need to introduce local variables for it based on its named
21609 // capture groups.
21611 // It's possible to have an interpolated regular expression node
21612 // that only contains strings. This is because it can be split
21613 // up by a heredoc. In this case we need to concat the unescaped
21614 // strings together and then parse them as a regular expression.
21616
21617 bool interpolated = false;
21618 size_t total_length = 0;
21619
21620 pm_node_t *part;
21621 PM_NODE_LIST_FOREACH(parts, index, part) {
21622 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21623 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21624 } else {
21625 interpolated = true;
21626 break;
21627 }
21628 }
21629
21630 if (!interpolated && total_length > 0) {
21631 void *memory = xmalloc(total_length);
21632 if (!memory) abort();
21633
21634 uint8_t *cursor = memory;
21635 PM_NODE_LIST_FOREACH(parts, index, part) {
21636 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21637 size_t length = pm_string_length(unescaped);
21638
21639 memcpy(cursor, pm_string_source(unescaped), length);
21640 cursor += length;
21641 }
21642
21643 pm_string_t owned;
21644 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21645
21646 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21647 pm_string_free(&owned);
21648 }
21649 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21650 // If we have a regular expression node, then we can just parse
21651 // the named captures directly off the unescaped string.
21652 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21653 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21654 }
21655
21656 return result;
21657 }
21659 case PM_TOKEN_USTAR:
21661 // The only times this will occur are when we are in an error state,
21662 // but we'll put them in here so that errors can propagate.
21668 case PM_TOKEN_CARET:
21669 case PM_TOKEN_PIPE:
21670 case PM_TOKEN_AMPERSAND:
21672 case PM_TOKEN_LESS_LESS:
21673 case PM_TOKEN_MINUS:
21674 case PM_TOKEN_PLUS:
21675 case PM_TOKEN_PERCENT:
21676 case PM_TOKEN_SLASH:
21677 case PM_TOKEN_STAR:
21678 case PM_TOKEN_STAR_STAR: {
21679 parser_lex(parser);
21680 pm_token_t operator = parser->previous;
21681 switch (PM_NODE_TYPE(node)) {
21685 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21686 }
21687 break;
21688 }
21689 case PM_AND_NODE: {
21690 pm_and_node_t *cast = (pm_and_node_t *) node;
21692 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21693 }
21694 break;
21695 }
21696 case PM_OR_NODE: {
21697 pm_or_node_t *cast = (pm_or_node_t *) node;
21699 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21700 }
21701 break;
21702 }
21703 default:
21704 break;
21705 }
21706
21707 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21708 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21709 }
21710 case PM_TOKEN_GREATER:
21712 case PM_TOKEN_LESS:
21713 case PM_TOKEN_LESS_EQUAL: {
21714 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21715 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21716 }
21717
21718 parser_lex(parser);
21719 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21720 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21721 }
21723 case PM_TOKEN_DOT: {
21724 parser_lex(parser);
21725 pm_token_t operator = parser->previous;
21726 pm_arguments_t arguments = { 0 };
21727
21728 // This if statement handles the foo.() syntax.
21729 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21730 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21731 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21732 }
21733
21734 switch (PM_NODE_TYPE(node)) {
21738 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21739 }
21740 break;
21741 }
21742 case PM_AND_NODE: {
21743 pm_and_node_t *cast = (pm_and_node_t *) node;
21745 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21746 }
21747 break;
21748 }
21749 case PM_OR_NODE: {
21750 pm_or_node_t *cast = (pm_or_node_t *) node;
21752 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21753 }
21754 break;
21755 }
21756 default:
21757 break;
21758 }
21759
21760 pm_token_t message;
21761
21762 switch (parser->current.type) {
21763 case PM_CASE_OPERATOR:
21764 case PM_CASE_KEYWORD:
21765 case PM_TOKEN_CONSTANT:
21767 case PM_TOKEN_METHOD_NAME: {
21768 parser_lex(parser);
21769 message = parser->previous;
21770 break;
21771 }
21772 default: {
21773 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21774 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21775 }
21776 }
21777
21778 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21779 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21780
21781 if (
21782 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21783 arguments.arguments == NULL &&
21784 arguments.opening_loc.start == NULL &&
21785 match1(parser, PM_TOKEN_COMMA)
21786 ) {
21787 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21788 } else {
21789 return (pm_node_t *) call;
21790 }
21791 }
21792 case PM_TOKEN_DOT_DOT:
21793 case PM_TOKEN_DOT_DOT_DOT: {
21794 parser_lex(parser);
21795
21796 pm_node_t *right = NULL;
21797 if (token_begins_expression_p(parser->current.type)) {
21798 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21799 }
21800
21801 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21802 }
21804 pm_token_t keyword = parser->current;
21805 parser_lex(parser);
21806
21807 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21808 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21809 }
21811 pm_token_t keyword = parser->current;
21812 parser_lex(parser);
21813
21814 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21815 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21816 }
21818 parser_lex(parser);
21819 pm_statements_node_t *statements = pm_statements_node_create(parser);
21820 pm_statements_node_body_append(parser, statements, node, true);
21821
21822 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21823 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21824 }
21826 parser_lex(parser);
21827 pm_statements_node_t *statements = pm_statements_node_create(parser);
21828 pm_statements_node_body_append(parser, statements, node, true);
21829
21830 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21831 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21832 }
21834 context_push(parser, PM_CONTEXT_TERNARY);
21835 pm_node_list_t current_block_exits = { 0 };
21836 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21837
21838 pm_token_t qmark = parser->current;
21839 parser_lex(parser);
21840
21841 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21842
21843 if (parser->recovering) {
21844 // If parsing the true expression of this ternary resulted in a syntax
21845 // error that we can recover from, then we're going to put missing nodes
21846 // and tokens into the remaining places. We want to be sure to do this
21847 // before the `expect` function call to make sure it doesn't
21848 // accidentally move past a ':' token that occurs after the syntax
21849 // error.
21850 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21851 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21852
21853 context_pop(parser);
21854 pop_block_exits(parser, previous_block_exits);
21855 pm_node_list_free(&current_block_exits);
21856
21857 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21858 }
21859
21860 accept1(parser, PM_TOKEN_NEWLINE);
21861 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21862
21863 pm_token_t colon = parser->previous;
21864 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21865
21866 context_pop(parser);
21867 pop_block_exits(parser, previous_block_exits);
21868 pm_node_list_free(&current_block_exits);
21869
21870 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21871 }
21872 case PM_TOKEN_COLON_COLON: {
21873 parser_lex(parser);
21874 pm_token_t delimiter = parser->previous;
21875
21876 switch (parser->current.type) {
21877 case PM_TOKEN_CONSTANT: {
21878 parser_lex(parser);
21879 pm_node_t *path;
21880
21881 if (
21882 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21883 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21884 ) {
21885 // If we have a constant immediately following a '::' operator, then
21886 // this can either be a constant path or a method call, depending on
21887 // what follows the constant.
21888 //
21889 // If we have parentheses, then this is a method call. That would
21890 // look like Foo::Bar().
21891 pm_token_t message = parser->previous;
21892 pm_arguments_t arguments = { 0 };
21893
21894 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21895 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21896 } else {
21897 // Otherwise, this is a constant path. That would look like Foo::Bar.
21898 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21899 }
21900
21901 // If this is followed by a comma then it is a multiple assignment.
21902 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21903 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21904 }
21905
21906 return path;
21907 }
21908 case PM_CASE_OPERATOR:
21909 case PM_CASE_KEYWORD:
21911 case PM_TOKEN_METHOD_NAME: {
21912 parser_lex(parser);
21913 pm_token_t message = parser->previous;
21914
21915 // If we have an identifier following a '::' operator, then it is for
21916 // sure a method call.
21917 pm_arguments_t arguments = { 0 };
21918 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21919 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21920
21921 // If this is followed by a comma then it is a multiple assignment.
21922 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21923 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21924 }
21925
21926 return (pm_node_t *) call;
21927 }
21929 // If we have a parenthesis following a '::' operator, then it is the
21930 // method call shorthand. That would look like Foo::(bar).
21931 pm_arguments_t arguments = { 0 };
21932 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21933
21934 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21935 }
21936 default: {
21937 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21938 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21939 }
21940 }
21941 }
21943 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21944 parser_lex(parser);
21945 accept1(parser, PM_TOKEN_NEWLINE);
21946
21947 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21948 context_pop(parser);
21949
21950 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21951 }
21952 case PM_TOKEN_BRACKET_LEFT: {
21953 parser_lex(parser);
21954
21955 pm_arguments_t arguments = { 0 };
21956 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21957
21958 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21959 pm_accepts_block_stack_push(parser, true);
21960 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21961 pm_accepts_block_stack_pop(parser);
21962 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21963 }
21964
21965 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21966
21967 // If we have a comma after the closing bracket then this is a multiple
21968 // assignment and we should parse the targets.
21969 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21970 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21971 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21972 }
21973
21974 // If we're at the end of the arguments, we can now check if there is a
21975 // block node that starts with a {. If there is, then we can parse it and
21976 // add it to the arguments.
21977 pm_block_node_t *block = NULL;
21978 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21979 block = parse_block(parser, (uint16_t) (depth + 1));
21980 pm_arguments_validate_block(parser, &arguments, block);
21981 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21982 block = parse_block(parser, (uint16_t) (depth + 1));
21983 }
21984
21985 if (block != NULL) {
21986 if (arguments.block != NULL) {
21987 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21988 if (arguments.arguments == NULL) {
21989 arguments.arguments = pm_arguments_node_create(parser);
21990 }
21991 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21992 }
21993
21994 arguments.block = (pm_node_t *) block;
21995 }
21996
21997 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21998 }
21999 case PM_TOKEN_KEYWORD_IN: {
22000 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22001 parser->pattern_matching_newlines = true;
22002
22003 pm_token_t operator = parser->current;
22004 parser->command_start = false;
22005 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22006 parser_lex(parser);
22007
22008 pm_constant_id_list_t captures = { 0 };
22009 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22010
22011 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22012 pm_constant_id_list_free(&captures);
22013
22014 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22015 }
22017 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22018 parser->pattern_matching_newlines = true;
22019
22020 pm_token_t operator = parser->current;
22021 parser->command_start = false;
22022 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22023 parser_lex(parser);
22024
22025 pm_constant_id_list_t captures = { 0 };
22026 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22027
22028 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22029 pm_constant_id_list_free(&captures);
22030
22031 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22032 }
22033 default:
22034 assert(false && "unreachable");
22035 return NULL;
22036 }
22037}
22038
22039#undef PM_PARSE_PATTERN_SINGLE
22040#undef PM_PARSE_PATTERN_TOP
22041#undef PM_PARSE_PATTERN_MULTI
22042
22047static inline bool
22048pm_call_node_command_p(const pm_call_node_t *node) {
22049 return (
22050 (node->opening_loc.start == NULL) &&
22051 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22052 (node->arguments != NULL || node->block != NULL)
22053 );
22054}
22055
22064static pm_node_t *
22065parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22066 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22067 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22068 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22069 }
22070
22071 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22072
22073 switch (PM_NODE_TYPE(node)) {
22074 case PM_MISSING_NODE:
22075 // If we found a syntax error, then the type of node returned by
22076 // parse_expression_prefix is going to be a missing node.
22077 return node;
22083 case PM_UNDEF_NODE:
22084 // These expressions are statements, and cannot be followed by
22085 // operators (except modifiers).
22086 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22087 return node;
22088 }
22089 break;
22090 case PM_CALL_NODE:
22091 // If we have a call node, then we need to check if it looks like a
22092 // method call without parentheses that contains arguments. If it
22093 // does, then it has different rules for parsing infix operators,
22094 // namely that it only accepts composition (and/or) and modifiers
22095 // (if/unless/etc.).
22096 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22097 return node;
22098 }
22099 break;
22100 case PM_SYMBOL_NODE:
22101 // If we have a symbol node that is being parsed as a label, then we
22102 // need to immediately return, because there should never be an
22103 // infix operator following this node.
22104 if (pm_symbol_node_label_p(node)) {
22105 return node;
22106 }
22107 break;
22108 default:
22109 break;
22110 }
22111
22112 // Otherwise we'll look and see if the next token can be parsed as an infix
22113 // operator. If it can, then we'll parse it using parse_expression_infix.
22114 pm_binding_powers_t current_binding_powers;
22115 pm_token_type_t current_token_type;
22116
22117 while (
22118 current_token_type = parser->current.type,
22119 current_binding_powers = pm_binding_powers[current_token_type],
22120 binding_power <= current_binding_powers.left &&
22121 current_binding_powers.binary
22122 ) {
22123 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22124
22125 switch (PM_NODE_TYPE(node)) {
22127 // Multi-write nodes are statements, and cannot be followed by
22128 // operators except modifiers.
22129 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22130 return node;
22131 }
22132 break;
22139 // These expressions are statements, by virtue of the right-hand
22140 // side of their write being an implicit array.
22141 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22142 return node;
22143 }
22144 break;
22145 case PM_CALL_NODE:
22146 // These expressions are also statements, by virtue of the
22147 // right-hand side of the expression (i.e., the last argument to
22148 // the call node) being an implicit array.
22149 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22150 return node;
22151 }
22152 break;
22153 default:
22154 break;
22155 }
22156
22157 // If the operator is nonassoc and we should not be able to parse the
22158 // upcoming infix operator, break.
22159 if (current_binding_powers.nonassoc) {
22160 // If this is a non-assoc operator and we are about to parse the
22161 // exact same operator, then we need to add an error.
22162 if (match1(parser, current_token_type)) {
22163 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22164 break;
22165 }
22166
22167 // If this is an endless range, then we need to reject a couple of
22168 // additional operators because it violates the normal operator
22169 // precedence rules. Those patterns are:
22170 //
22171 // 1.. & 2
22172 // 1.. * 2
22173 //
22174 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22176 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22177 break;
22178 }
22179
22180 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22181 break;
22182 }
22183 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22184 break;
22185 }
22186 }
22187
22188 if (accepts_command_call) {
22189 // A command-style method call is only accepted on method chains.
22190 // Thus, we check whether the parsed node can continue method chains.
22191 // The method chain can continue if the parsed node is one of the following five kinds:
22192 // (1) index access: foo[1]
22193 // (2) attribute access: foo.bar
22194 // (3) method call with parenthesis: foo.bar(1)
22195 // (4) method call with a block: foo.bar do end
22196 // (5) constant path: foo::Bar
22197 switch (node->type) {
22198 case PM_CALL_NODE: {
22199 pm_call_node_t *cast = (pm_call_node_t *)node;
22200 if (
22201 // (1) foo[1]
22202 !(
22203 cast->call_operator_loc.start == NULL &&
22204 cast->message_loc.start != NULL &&
22205 cast->message_loc.start[0] == '[' &&
22206 cast->message_loc.end[-1] == ']'
22207 ) &&
22208 // (2) foo.bar
22209 !(
22210 cast->call_operator_loc.start != NULL &&
22211 cast->arguments == NULL &&
22212 cast->block == NULL &&
22213 cast->opening_loc.start == NULL
22214 ) &&
22215 // (3) foo.bar(1)
22216 !(
22217 cast->call_operator_loc.start != NULL &&
22218 cast->opening_loc.start != NULL
22219 ) &&
22220 // (4) foo.bar do end
22221 !(
22222 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22223 )
22224 ) {
22225 accepts_command_call = false;
22226 }
22227 break;
22228 }
22229 // (5) foo::Bar
22231 break;
22232 default:
22233 accepts_command_call = false;
22234 break;
22235 }
22236 }
22237 }
22238
22239 return node;
22240}
22241
22246static pm_statements_node_t *
22247wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22248 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22249 if (statements == NULL) {
22250 statements = pm_statements_node_create(parser);
22251 }
22252
22253 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22254 pm_arguments_node_arguments_append(
22255 arguments,
22256 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22257 );
22258
22259 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22260 parser,
22261 arguments,
22262 pm_parser_constant_id_constant(parser, "print", 5)
22263 ), true);
22264 }
22265
22266 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22267 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22268 if (statements == NULL) {
22269 statements = pm_statements_node_create(parser);
22270 }
22271
22272 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22273 pm_arguments_node_arguments_append(
22274 arguments,
22275 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22276 );
22277
22278 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22279 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22280
22281 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22282 parser,
22283 pm_parser_constant_id_constant(parser, "$F", 2),
22284 (pm_node_t *) call
22285 );
22286
22287 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22288 }
22289
22290 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22291 pm_arguments_node_arguments_append(
22292 arguments,
22293 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22294 );
22295
22296 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22297 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22298 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22299 parser,
22300 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22301 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22302 (pm_node_t *) pm_true_node_synthesized_create(parser)
22303 ));
22304
22305 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22306 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22307 }
22308
22309 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22310 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22311 parser,
22312 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22313 statements
22314 ), true);
22315
22316 statements = wrapped_statements;
22317 }
22318
22319 return statements;
22320}
22321
22325static pm_node_t *
22326parse_program(pm_parser_t *parser) {
22327 // If the current scope is NULL, then we want to push a new top level scope.
22328 // The current scope could exist in the event that we are parsing an eval
22329 // and the user has passed into scopes that already exist.
22330 if (parser->current_scope == NULL) {
22331 pm_parser_scope_push(parser, true);
22332 }
22333
22334 pm_node_list_t current_block_exits = { 0 };
22335 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22336
22337 parser_lex(parser);
22338 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22339
22340 if (statements != NULL && !parser->parsing_eval) {
22341 // If we have statements, then the top-level statement should be
22342 // explicitly checked as well. We have to do this here because
22343 // everywhere else we check all but the last statement.
22344 assert(statements->body.size > 0);
22345 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22346 }
22347
22348 pm_constant_id_list_t locals;
22349 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22350 pm_parser_scope_pop(parser);
22351
22352 // At the top level, see if we need to wrap the statements in a program
22353 // node with a while loop based on the options.
22355 statements = wrap_statements(parser, statements);
22356 } else {
22357 flush_block_exits(parser, previous_block_exits);
22358 pm_node_list_free(&current_block_exits);
22359 }
22360
22361 // If this is an empty file, then we're still going to parse all of the
22362 // statements in order to gather up all of the comments and such. Here we'll
22363 // correct the location information.
22364 if (statements == NULL) {
22365 statements = pm_statements_node_create(parser);
22366 pm_statements_node_location_set(statements, parser->start, parser->start);
22367 }
22368
22369 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22370}
22371
22372/******************************************************************************/
22373/* External functions */
22374/******************************************************************************/
22375
22385static const char *
22386pm_strnstr(const char *big, const char *little, size_t big_length) {
22387 size_t little_length = strlen(little);
22388
22389 for (const char *big_end = big + big_length; big < big_end; big++) {
22390 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22391 }
22392
22393 return NULL;
22394}
22395
22396#ifdef _WIN32
22397#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22398#else
22404static void
22405pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22406 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22407 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22408 }
22409}
22410#endif
22411
22416static void
22417pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22418 const char *switches = pm_strnstr(engine, " -", length);
22419 if (switches == NULL) return;
22420
22421 pm_options_t next_options = *options;
22422 options->shebang_callback(
22423 &next_options,
22424 (const uint8_t *) (switches + 1),
22425 length - ((size_t) (switches - engine)) - 1,
22426 options->shebang_callback_data
22427 );
22428
22429 size_t encoding_length;
22430 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22431 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22432 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22433 }
22434
22435 parser->command_line = next_options.command_line;
22436 parser->frozen_string_literal = next_options.frozen_string_literal;
22437}
22438
22443pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22444 assert(source != NULL);
22445
22446 *parser = (pm_parser_t) {
22447 .node_id = 0,
22448 .lex_state = PM_LEX_STATE_BEG,
22449 .enclosure_nesting = 0,
22450 .lambda_enclosure_nesting = -1,
22451 .brace_nesting = 0,
22452 .do_loop_stack = 0,
22453 .accepts_block_stack = 0,
22454 .lex_modes = {
22455 .index = 0,
22456 .stack = {{ .mode = PM_LEX_DEFAULT }},
22457 .current = &parser->lex_modes.stack[0],
22458 },
22459 .start = source,
22460 .end = source + size,
22461 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22462 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22463 .next_start = NULL,
22464 .heredoc_end = NULL,
22465 .data_loc = { .start = NULL, .end = NULL },
22466 .comment_list = { 0 },
22467 .magic_comment_list = { 0 },
22468 .warning_list = { 0 },
22469 .error_list = { 0 },
22470 .current_scope = NULL,
22471 .current_context = NULL,
22472 .encoding = PM_ENCODING_UTF_8_ENTRY,
22473 .encoding_changed_callback = NULL,
22474 .encoding_comment_start = source,
22475 .lex_callback = NULL,
22476 .filepath = { 0 },
22477 .constant_pool = { 0 },
22478 .newline_list = { 0 },
22479 .integer_base = 0,
22480 .current_string = PM_STRING_EMPTY,
22481 .start_line = 1,
22482 .explicit_encoding = NULL,
22483 .command_line = 0,
22484 .parsing_eval = false,
22485 .partial_script = false,
22486 .command_start = true,
22487 .recovering = false,
22488 .encoding_locked = false,
22489 .encoding_changed = false,
22490 .pattern_matching_newlines = false,
22491 .in_keyword_arg = false,
22492 .current_block_exits = NULL,
22493 .semantic_token_seen = false,
22494 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22495 .current_regular_expression_ascii_only = false,
22496 .warn_mismatched_indentation = true
22497 };
22498
22499 // Initialize the constant pool. We're going to completely guess as to the
22500 // number of constants that we'll need based on the size of the input. The
22501 // ratio we chose here is actually less arbitrary than you might think.
22502 //
22503 // We took ~50K Ruby files and measured the size of the file versus the
22504 // number of constants that were found in those files. Then we found the
22505 // average and standard deviation of the ratios of constants/bytesize. Then
22506 // we added 1.34 standard deviations to the average to get a ratio that
22507 // would fit 75% of the files (for a two-tailed distribution). This works
22508 // because there was about a 0.77 correlation and the distribution was
22509 // roughly normal.
22510 //
22511 // This ratio will need to change if we add more constants to the constant
22512 // pool for another node type.
22513 uint32_t constant_size = ((uint32_t) size) / 95;
22514 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22515
22516 // Initialize the newline list. Similar to the constant pool, we're going to
22517 // guess at the number of newlines that we'll need based on the size of the
22518 // input.
22519 size_t newline_size = size / 22;
22520 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22521
22522 // If options were provided to this parse, establish them here.
22523 if (options != NULL) {
22524 // filepath option
22525 parser->filepath = options->filepath;
22526
22527 // line option
22528 parser->start_line = options->line;
22529
22530 // encoding option
22531 size_t encoding_length = pm_string_length(&options->encoding);
22532 if (encoding_length > 0) {
22533 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22534 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22535 }
22536
22537 // encoding_locked option
22538 parser->encoding_locked = options->encoding_locked;
22539
22540 // frozen_string_literal option
22542
22543 // command_line option
22544 parser->command_line = options->command_line;
22545
22546 // version option
22547 parser->version = options->version;
22548
22549 // partial_script
22550 parser->partial_script = options->partial_script;
22551
22552 // scopes option
22553 parser->parsing_eval = options->scopes_count > 0;
22554 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22555
22556 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22557 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22558 pm_parser_scope_push(parser, scope_index == 0);
22559
22560 // Scopes given from the outside are not allowed to have numbered
22561 // parameters.
22562 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22563
22564 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22565 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22566
22567 const uint8_t *source = pm_string_source(local);
22568 size_t length = pm_string_length(local);
22569
22570 void *allocated = xmalloc(length);
22571 if (allocated == NULL) continue;
22572
22573 memcpy(allocated, source, length);
22574 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22575 }
22576 }
22577 }
22578
22579 pm_accepts_block_stack_push(parser, true);
22580
22581 // Skip past the UTF-8 BOM if it exists.
22582 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22583 parser->current.end += 3;
22584 parser->encoding_comment_start += 3;
22585
22586 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22588 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22589 }
22590 }
22591
22592 // If the -x command line flag is set, or the first shebang of the file does
22593 // not include "ruby", then we'll search for a shebang that does include
22594 // "ruby" and start parsing from there.
22595 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22596
22597 // If the first two bytes of the source are a shebang, then we will do a bit
22598 // of extra processing.
22599 //
22600 // First, we'll indicate that the encoding comment is at the end of the
22601 // shebang. This means that when a shebang is present the encoding comment
22602 // can begin on the second line.
22603 //
22604 // Second, we will check if the shebang includes "ruby". If it does, then we
22605 // we will start parsing from there. We will also potentially warning the
22606 // user if there is a carriage return at the end of the shebang. We will
22607 // also potentially call the shebang callback if this is the main script to
22608 // allow the caller to parse the shebang and find any command-line options.
22609 // If the shebang does not include "ruby" and this is the main script being
22610 // parsed, then we will start searching the file for a shebang that does
22611 // contain "ruby" as if -x were passed on the command line.
22612 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22613 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22614
22615 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22616 const char *engine;
22617
22618 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22619 if (newline != NULL) {
22620 parser->encoding_comment_start = newline + 1;
22621
22622 if (options == NULL || options->main_script) {
22623 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22624 }
22625 }
22626
22627 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22628 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22629 }
22630
22631 search_shebang = false;
22632 } else if (options->main_script && !parser->parsing_eval) {
22633 search_shebang = true;
22634 }
22635 }
22636
22637 // Here we're going to find the first shebang that includes "ruby" and start
22638 // parsing from there.
22639 if (search_shebang) {
22640 // If a shebang that includes "ruby" is not found, then we're going to a
22641 // a load error to the list of errors on the parser.
22642 bool found_shebang = false;
22643
22644 // This is going to point to the start of each line as we check it.
22645 // We'll maintain a moving window looking at each line at they come.
22646 const uint8_t *cursor = parser->start;
22647
22648 // The newline pointer points to the end of the current line that we're
22649 // considering. If it is NULL, then we're at the end of the file.
22650 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22651
22652 while (newline != NULL) {
22653 pm_newline_list_append(&parser->newline_list, newline);
22654
22655 cursor = newline + 1;
22656 newline = next_newline(cursor, parser->end - cursor);
22657
22658 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22659 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22660 const char *engine;
22661 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22662 found_shebang = true;
22663
22664 if (newline != NULL) {
22665 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22666 parser->encoding_comment_start = newline + 1;
22667 }
22668
22669 if (options != NULL && options->shebang_callback != NULL) {
22670 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22671 }
22672
22673 break;
22674 }
22675 }
22676 }
22677
22678 if (found_shebang) {
22679 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22680 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22681 } else {
22682 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22683 pm_newline_list_clear(&parser->newline_list);
22684 }
22685 }
22686
22687 // The encoding comment can start after any amount of inline whitespace, so
22688 // here we'll advance it to the first non-inline-whitespace character so
22689 // that it is ready for future comparisons.
22690 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22691}
22692
22698pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22699 parser->encoding_changed_callback = callback;
22700}
22701
22705static inline void
22706pm_comment_list_free(pm_list_t *list) {
22707 pm_list_node_t *node, *next;
22708
22709 for (node = list->head; node != NULL; node = next) {
22710 next = node->next;
22711
22712 pm_comment_t *comment = (pm_comment_t *) node;
22713 xfree(comment);
22714 }
22715}
22716
22720static inline void
22721pm_magic_comment_list_free(pm_list_t *list) {
22722 pm_list_node_t *node, *next;
22723
22724 for (node = list->head; node != NULL; node = next) {
22725 next = node->next;
22726
22729 }
22730}
22731
22736pm_parser_free(pm_parser_t *parser) {
22737 pm_string_free(&parser->filepath);
22738 pm_diagnostic_list_free(&parser->error_list);
22739 pm_diagnostic_list_free(&parser->warning_list);
22740 pm_comment_list_free(&parser->comment_list);
22741 pm_magic_comment_list_free(&parser->magic_comment_list);
22742 pm_constant_pool_free(&parser->constant_pool);
22743 pm_newline_list_free(&parser->newline_list);
22744
22745 while (parser->current_scope != NULL) {
22746 // Normally, popping the scope doesn't free the locals since it is
22747 // assumed that ownership has transferred to the AST. However if we have
22748 // scopes while we're freeing the parser, it's likely they came from
22749 // eval scopes and we need to free them explicitly here.
22750 pm_parser_scope_pop(parser);
22751 }
22752
22753 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22754 lex_mode_pop(parser);
22755 }
22756}
22757
22762pm_parse(pm_parser_t *parser) {
22763 return parse_program(parser);
22764}
22765
22771static bool
22772pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22773#define LINE_SIZE 4096
22774 char line[LINE_SIZE];
22775
22776 while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22777 size_t length = LINE_SIZE;
22778 while (length > 0 && line[length - 1] == '\n') length--;
22779
22780 if (length == LINE_SIZE) {
22781 // If we read a line that is the maximum size and it doesn't end
22782 // with a newline, then we'll just append it to the buffer and
22783 // continue reading.
22784 length--;
22785 pm_buffer_append_string(buffer, line, length);
22786 continue;
22787 }
22788
22789 // Append the line to the buffer.
22790 length--;
22791 pm_buffer_append_string(buffer, line, length);
22792
22793 // Check if the line matches the __END__ marker. If it does, then stop
22794 // reading and return false. In most circumstances, this means we should
22795 // stop reading from the stream so that the DATA constant can pick it
22796 // up.
22797 switch (length) {
22798 case 7:
22799 if (strncmp(line, "__END__", 7) == 0) return false;
22800 break;
22801 case 8:
22802 if (strncmp(line, "__END__\n", 8) == 0) return false;
22803 break;
22804 case 9:
22805 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22806 break;
22807 }
22808 }
22809
22810 return true;
22811#undef LINE_SIZE
22812}
22813
22823static bool
22824pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22825 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22826
22827 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22828 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22829 return true;
22830 }
22831 }
22832
22833 return false;
22834}
22835
22843pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22844 pm_buffer_init(buffer);
22845
22846 bool eof = pm_parse_stream_read(buffer, stream, fgets);
22847 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22848 pm_node_t *node = pm_parse(parser);
22849
22850 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22851 pm_node_destroy(parser, node);
22852 eof = pm_parse_stream_read(buffer, stream, fgets);
22853
22854 pm_parser_free(parser);
22855 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22856 node = pm_parse(parser);
22857 }
22858
22859 return node;
22860}
22861
22866pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22867 pm_options_t options = { 0 };
22868 pm_options_read(&options, data);
22869
22870 pm_parser_t parser;
22871 pm_parser_init(&parser, source, size, &options);
22872
22873 pm_node_t *node = pm_parse(&parser);
22874 pm_node_destroy(&parser, node);
22875
22876 bool result = parser.error_list.size == 0;
22877 pm_parser_free(&parser);
22878 pm_options_free(&options);
22879
22880 return result;
22881}
22882
22883#undef PM_CASE_KEYWORD
22884#undef PM_CASE_OPERATOR
22885#undef PM_CASE_WRITABLE
22886#undef PM_STRING_EMPTY
22887#undef PM_LOCATION_NODE_BASE_VALUE
22888#undef PM_LOCATION_NODE_VALUE
22889#undef PM_LOCATION_NULL_VALUE
22890#undef PM_LOCATION_TOKEN_VALUE
22891
22892// We optionally support serializing to a binary string. For systems that don't
22893// want or need this functionality, it can be turned off with the
22894// PRISM_EXCLUDE_SERIALIZATION define.
22895#ifndef PRISM_EXCLUDE_SERIALIZATION
22896
22897static inline void
22898pm_serialize_header(pm_buffer_t *buffer) {
22899 pm_buffer_append_string(buffer, "PRISM", 5);
22900 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22901 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22902 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22903 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22904}
22905
22910pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22911 pm_serialize_header(buffer);
22912 pm_serialize_content(parser, node, buffer);
22913 pm_buffer_append_byte(buffer, '\0');
22914}
22915
22921pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22922 pm_options_t options = { 0 };
22923 pm_options_read(&options, data);
22924
22925 pm_parser_t parser;
22926 pm_parser_init(&parser, source, size, &options);
22927
22928 pm_node_t *node = pm_parse(&parser);
22929
22930 pm_serialize_header(buffer);
22931 pm_serialize_content(&parser, node, buffer);
22932 pm_buffer_append_byte(buffer, '\0');
22933
22934 pm_node_destroy(&parser, node);
22935 pm_parser_free(&parser);
22936 pm_options_free(&options);
22937}
22938
22944pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22945 pm_parser_t parser;
22946 pm_options_t options = { 0 };
22947 pm_options_read(&options, data);
22948
22949 pm_buffer_t parser_buffer;
22950 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22951 pm_serialize_header(buffer);
22952 pm_serialize_content(&parser, node, buffer);
22953 pm_buffer_append_byte(buffer, '\0');
22954
22955 pm_node_destroy(&parser, node);
22956 pm_buffer_free(&parser_buffer);
22957 pm_parser_free(&parser);
22958 pm_options_free(&options);
22959}
22960
22965pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22966 pm_options_t options = { 0 };
22967 pm_options_read(&options, data);
22968
22969 pm_parser_t parser;
22970 pm_parser_init(&parser, source, size, &options);
22971
22972 pm_node_t *node = pm_parse(&parser);
22973 pm_serialize_header(buffer);
22974 pm_serialize_encoding(parser.encoding, buffer);
22975 pm_buffer_append_varsint(buffer, parser.start_line);
22976 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22977
22978 pm_node_destroy(&parser, node);
22979 pm_parser_free(&parser);
22980 pm_options_free(&options);
22981}
22982
22983#endif
22984
22985/******************************************************************************/
22986/* Slice queries for the Ruby API */
22987/******************************************************************************/
22988
22990typedef enum {
22992 PM_SLICE_TYPE_ERROR = -1,
22993
22995 PM_SLICE_TYPE_NONE,
22996
22998 PM_SLICE_TYPE_LOCAL,
22999
23001 PM_SLICE_TYPE_CONSTANT,
23002
23004 PM_SLICE_TYPE_METHOD_NAME
23005} pm_slice_type_t;
23006
23010pm_slice_type_t
23011pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23012 // first, get the right encoding object
23013 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23014 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23015
23016 // check that there is at least one character
23017 if (length == 0) return PM_SLICE_TYPE_NONE;
23018
23019 size_t width;
23020 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23021 // valid because alphabetical
23022 } else if (*source == '_') {
23023 // valid because underscore
23024 width = 1;
23025 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23026 // valid because multibyte
23027 } else {
23028 // invalid because no match
23029 return PM_SLICE_TYPE_NONE;
23030 }
23031
23032 // determine the type of the slice based on the first character
23033 const uint8_t *end = source + length;
23034 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23035
23036 // next, iterate through all of the bytes of the string to ensure that they
23037 // are all valid identifier characters
23038 source += width;
23039
23040 while (source < end) {
23041 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23042 // valid because alphanumeric
23043 source += width;
23044 } else if (*source == '_') {
23045 // valid because underscore
23046 source++;
23047 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23048 // valid because multibyte
23049 source += width;
23050 } else {
23051 // invalid because no match
23052 break;
23053 }
23054 }
23055
23056 // accept a ! or ? at the end of the slice as a method name
23057 if (*source == '!' || *source == '?' || *source == '=') {
23058 source++;
23059 result = PM_SLICE_TYPE_METHOD_NAME;
23060 }
23061
23062 // valid if we are at the end of the slice
23063 return source == end ? result : PM_SLICE_TYPE_NONE;
23064}
23065
23070pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23071 switch (pm_slice_type(source, length, encoding_name)) {
23072 case PM_SLICE_TYPE_ERROR:
23073 return PM_STRING_QUERY_ERROR;
23074 case PM_SLICE_TYPE_NONE:
23075 case PM_SLICE_TYPE_CONSTANT:
23076 case PM_SLICE_TYPE_METHOD_NAME:
23077 return PM_STRING_QUERY_FALSE;
23078 case PM_SLICE_TYPE_LOCAL:
23079 return PM_STRING_QUERY_TRUE;
23080 }
23081
23082 assert(false && "unreachable");
23083 return PM_STRING_QUERY_FALSE;
23084}
23085
23090pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23091 switch (pm_slice_type(source, length, encoding_name)) {
23092 case PM_SLICE_TYPE_ERROR:
23093 return PM_STRING_QUERY_ERROR;
23094 case PM_SLICE_TYPE_NONE:
23095 case PM_SLICE_TYPE_LOCAL:
23096 case PM_SLICE_TYPE_METHOD_NAME:
23097 return PM_STRING_QUERY_FALSE;
23098 case PM_SLICE_TYPE_CONSTANT:
23099 return PM_STRING_QUERY_TRUE;
23100 }
23101
23102 assert(false && "unreachable");
23103 return PM_STRING_QUERY_FALSE;
23104}
23105
23110pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23111#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23112#define C1(c) (*source == c)
23113#define C2(s) (memcmp(source, s, 2) == 0)
23114#define C3(s) (memcmp(source, s, 3) == 0)
23115
23116 switch (pm_slice_type(source, length, encoding_name)) {
23117 case PM_SLICE_TYPE_ERROR:
23118 return PM_STRING_QUERY_ERROR;
23119 case PM_SLICE_TYPE_NONE:
23120 break;
23121 case PM_SLICE_TYPE_LOCAL:
23122 // numbered parameters are not valid method names
23123 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23124 case PM_SLICE_TYPE_CONSTANT:
23125 // all constants are valid method names
23126 case PM_SLICE_TYPE_METHOD_NAME:
23127 // all method names are valid method names
23128 return PM_STRING_QUERY_TRUE;
23129 }
23130
23131 switch (length) {
23132 case 1:
23133 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23134 case 2:
23135 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23136 case 3:
23137 return B(C3("===") || C3("<=>") || C3("[]="));
23138 default:
23139 return PM_STRING_QUERY_FALSE;
23140 }
23141
23142#undef B
23143#undef C1
23144#undef C2
23145#undef C3
23146}
@ PM_RANGE_FLAGS_EXCLUDE_END
... operator
Definition ast.h:7854
@ PM_DEFINED_NODE
DefinedNode.
Definition ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition ast.h:628
@ PM_NIL_NODE
NilNode.
Definition ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition ast.h:691
@ PM_OR_NODE
OrNode.
Definition ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition ast.h:889
@ PM_IF_NODE
IfNode.
Definition ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition ast.h:724
@ PM_HASH_NODE
HashNode.
Definition ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition ast.h:760
@ PM_AND_NODE
AndNode.
Definition ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition ast.h:1000
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition ast.h:955
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition ast.h:862
@ PM_STRING_NODE
StringNode.
Definition ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition ast.h:823
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7937
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition ast.h:7920
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition ast.h:7917
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7914
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition ast.h:7746
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition ast.h:7749
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition ast.h:7752
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition ast.h:1063
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition ast.h:1053
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition ast.h:7811
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition ast.h:7808
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition ast.h:7805
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition ast.h:7802
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition ast.h:7946
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition ast.h:7774
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition ast.h:7780
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition ast.h:7777
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7892
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition ast.h:7865
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1040
@ PM_TOKEN_STAR_STAR
**
Definition ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ... range operator or forwarding parameter
Definition ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition ast.h:328
@ PM_TOKEN_COMMA
,
Definition ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition ast.h:523
@ PM_TOKEN_GREATER
Definition ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition ast.h:505
@ PM_TOKEN_EMBVAR
Definition ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
‍>=
Definition ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition ast.h:409
@ PM_TOKEN_PERCENT
%
Definition ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition ast.h:274
@ PM_TOKEN_BANG
! or !@
Definition ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition ast.h:100
@ PM_TOKEN_PIPE
|
Definition ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition ast.h:397
@ PM_TOKEN_BANG_TILDE
!~
Definition ast.h:67
@ PM_TOKEN_DOT
the .
Definition ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition ast.h:490
@ PM_TOKEN_MINUS
Definition ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition ast.h:217
@ PM_TOKEN_LESS
<
Definition ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition ast.h:271
@ PM_TOKEN_SLASH
/
Definition ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition ast.h:301
@ PM_TOKEN_COLON
:
Definition ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition ast.h:337
@ PM_TOKEN_EQUAL
=
Definition ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ... operator
Definition ast.h:499
@ PM_TOKEN_STAR
Definition ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition ast.h:448
@ PM_TOKEN_CARET
^
Definition ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition ast.h:277
@ PM_TOKEN_PLUS
Definition ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition ast.h:205
@ PM_TOKEN_LABEL
a label
Definition ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition ast.h:367
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7791
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition ast.h:7838
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:213
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:219
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2133
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2110
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2040
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:362
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17994
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17996
const uint8_t * start
The start of the regular expression.
Definition prism.c:17999
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18010
const uint8_t * end
The end of the regular expression.
Definition prism.c:18002
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20853
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20864
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20855
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20861
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20858
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20871
AndNode.
Definition ast.h:1263
struct pm_node * left
AndNode::left.
Definition ast.h:1279
struct pm_node * right
AndNode::right.
Definition ast.h:1292
ArgumentsNode.
Definition ast.h:1324
pm_node_t base
The embedded base node.
Definition ast.h:1326
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1337
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1355
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1365
ArrayPatternNode.
Definition ast.h:1416
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1424
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1464
pm_node_t base
The embedded base node.
Definition ast.h:1418
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1474
AssocNode.
Definition ast.h:1489
struct pm_node * value
AssocNode::value.
Definition ast.h:1521
struct pm_node * key
AssocNode::key.
Definition ast.h:1508
BeginNode.
Definition ast.h:1615
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1668
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1648
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1638
pm_node_t base
The embedded base node.
Definition ast.h:1617
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1658
This struct represents a set of binding powers used for a given token.
Definition prism.c:12955
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12963
pm_binding_power_t left
The left binding power.
Definition prism.c:12957
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12969
pm_binding_power_t right
The right binding power.
Definition prism.c:12960
BlockLocalVariableNode.
Definition ast.h:1734
BlockNode.
Definition ast.h:1762
BlockParameterNode.
Definition ast.h:1838
BlockParametersNode.
Definition ast.h:1892
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2119
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2180
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2200
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2138
pm_constant_id_t name
CallNode::name.
Definition ast.h:2161
pm_node_t base
The embedded base node.
Definition ast.h:2121
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2151
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2171
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2190
struct pm_node * block
CallNode::block.
Definition ast.h:2210
CaseMatchNode.
Definition ast.h:2545
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2568
CaseNode.
Definition ast.h:2615
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2638
ClassVariableReadNode.
Definition ast.h:2880
ClassVariableTargetNode.
Definition ast.h:2909
ClassVariableWriteNode.
Definition ast.h:2932
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3146
ConstantPathTargetNode.
Definition ast.h:3284
ConstantReadNode.
Definition ast.h:3379
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3408
ConstantWriteNode.
Definition ast.h:3431
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:359
ElseNode.
Definition ast.h:3610
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3623
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3708
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3721
FindPatternNode.
Definition ast.h:3765
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3773
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3793
pm_node_t base
The embedded base node.
Definition ast.h:3767
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3798
FlipFlopNode.
Definition ast.h:3816
FloatNode.
Definition ast.h:3849
double value
FloatNode::value.
Definition ast.h:3859
pm_node_t base
The embedded base node.
Definition ast.h:3851
ForwardingParameterNode.
Definition ast.h:3985
GlobalVariableReadNode.
Definition ast.h:4145
GlobalVariableTargetNode.
Definition ast.h:4174
GlobalVariableWriteNode.
Definition ast.h:4197
HashNode.
Definition ast.h:4259
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4285
HashPatternNode.
Definition ast.h:4313
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4336
pm_node_t base
The embedded base node.
Definition ast.h:4315
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4341
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4321
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4362
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4422
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4441
ImaginaryNode.
Definition ast.h:4468
InstanceVariableReadNode.
Definition ast.h:4958
InstanceVariableTargetNode.
Definition ast.h:4987
InstanceVariableWriteNode.
Definition ast.h:5010
IntegerNode.
Definition ast.h:5078
pm_integer_t value
IntegerNode::value.
Definition ast.h:5088
pm_node_t base
The embedded base node.
Definition ast.h:5080
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5116
InterpolatedRegularExpressionNode.
Definition ast.h:5162
InterpolatedStringNode.
Definition ast.h:5199
pm_node_t base
The embedded base node.
Definition ast.h:5201
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5207
InterpolatedSymbolNode.
Definition ast.h:5232
pm_node_t base
The embedded base node.
Definition ast.h:5234
InterpolatedXStringNode.
Definition ast.h:5265
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5273
pm_node_t base
The embedded base node.
Definition ast.h:5267
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5278
KeywordHashNode.
Definition ast.h:5337
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
enum pm_lex_mode::@91 mode
The type of this lex mode.
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
union pm_lex_mode::@92 as
The data associated with this type of lex mode.
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5579
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5610
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5597
LocalVariableTargetNode.
Definition ast.h:5625
LocalVariableWriteNode.
Definition ast.h:5653
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5680
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5667
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5745
MatchWriteNode.
Definition ast.h:5849
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5862
MultiTargetNode.
Definition ast.h:5945
pm_node_t base
The embedded base node.
Definition ast.h:5947
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6003
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:5963
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6013
MultiWriteNode.
Definition ast.h:6028
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:558
size_t size
The number of nodes in the list.
Definition ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:566
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1069
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1092
OptionalParameterNode.
Definition ast.h:6301
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:98
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:147
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:109
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:163
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:170
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:124
int32_t line
The line within the file that the parse starts on.
Definition options.h:118
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:103
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:156
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:180
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:129
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:112
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:144
OrNode.
Definition ast.h:6339
struct pm_node * left
OrNode::left.
Definition ast.h:6355
struct pm_node * right
OrNode::right.
Definition ast.h:6368
ParametersNode.
Definition ast.h:6394
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6412
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6432
pm_node_t base
The embedded base node.
Definition ast.h:6396
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6427
ParenthesesNode.
Definition ast.h:6447
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6455
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
struct pm_parser::@97 lex_modes
A stack of lex modes.
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6653
struct pm_node * right
RangeNode::right.
Definition ast.h:6683
struct pm_node * left
RangeNode::left.
Definition ast.h:6669
RationalNode.
Definition ast.h:6711
pm_node_t base
The embedded base node.
Definition ast.h:6713
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6723
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10381
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10386
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10383
RegularExpressionNode.
Definition ast.h:6778
pm_node_t base
The embedded base node.
Definition ast.h:6780
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6801
RequiredParameterNode.
Definition ast.h:6852
RescueModifierNode.
Definition ast.h:6875
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:6893
RescueNode.
Definition ast.h:6913
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:6946
pm_node_t base
The embedded base node.
Definition ast.h:6915
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7208
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7221
StatementsNode.
Definition ast.h:7236
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7244
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7271
pm_node_t base
The embedded base node.
Definition ast.h:7273
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7294
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7289
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7279
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@98 type
The type of the string.
SymbolNode.
Definition ast.h:7363
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7376
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7386
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10355
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10360
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10366
This struct represents a token in the Ruby source.
Definition ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:535
pm_token_type_t type
The type of the token.
Definition ast.h:532
UndefNode.
Definition ast.h:7419
UnlessNode.
Definition ast.h:7450
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7500
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7510
WhenNode.
Definition ast.h:7586
XStringNode.
Definition ast.h:7677