Ruby 3.5.0dev (2025-11-03 revision 4a3d8346a6d0e068508631541f6bc43e8b154ea1)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1043 case PM_MATCH_REQUIRED_NODE:
1044 return void_node != NULL ? void_node : node;
1045 case PM_MATCH_PREDICATE_NODE:
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1151 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1185 case PM_BACK_REFERENCE_READ_NODE:
1186 case PM_CLASS_VARIABLE_READ_NODE:
1187 case PM_GLOBAL_VARIABLE_READ_NODE:
1188 case PM_INSTANCE_VARIABLE_READ_NODE:
1189 case PM_LOCAL_VARIABLE_READ_NODE:
1190 case PM_NUMBERED_REFERENCE_READ_NODE:
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1249 case PM_CONSTANT_PATH_NODE:
1250 type = "::";
1251 length = 2;
1252 break;
1253 case PM_CONSTANT_READ_NODE:
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1268 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1269 case PM_INTERPOLATED_STRING_NODE:
1270 case PM_RATIONAL_NODE:
1271 case PM_REGULAR_EXPRESSION_NODE:
1272 case PM_SOURCE_ENCODING_NODE:
1273 case PM_SOURCE_FILE_NODE:
1274 case PM_SOURCE_LINE_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1287 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1392 case PM_REGULAR_EXPRESSION_NODE:
1393 case PM_SOURCE_ENCODING_NODE:
1394 case PM_SOURCE_FILE_NODE:
1395 case PM_SOURCE_LINE_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1477 case PM_REGULAR_EXPRESSION_NODE:
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1482 node->type = PM_MATCH_LAST_LINE_NODE;
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1489 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1494 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1511 case PM_SOURCE_FILE_NODE:
1512 case PM_INTERPOLATED_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1516 case PM_INTERPOLATED_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1519 case PM_SOURCE_LINE_NODE:
1520 case PM_SOURCE_ENCODING_NODE:
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1526 case PM_CLASS_VARIABLE_WRITE_NODE:
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1529 case PM_CONSTANT_WRITE_NODE:
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1532 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1535 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1538 case PM_LOCAL_VARIABLE_WRITE_NODE:
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1541 case PM_MULTI_WRITE_NODE:
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2626
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2629static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2630
2636static pm_call_node_t *
2637pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2638 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2639
2640 *node = (pm_call_node_t) {
2641 {
2642 .type = PM_CALL_NODE,
2643 .flags = flags,
2644 .node_id = PM_NODE_IDENTIFY(parser),
2645 .location = PM_LOCATION_NULL_VALUE(parser),
2646 },
2647 .receiver = NULL,
2648 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2651 .arguments = NULL,
2652 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2653 .equal_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2654 .block = NULL,
2655 .name = 0
2656 };
2657
2658 return node;
2659}
2660
2665static inline pm_node_flags_t
2666pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2667 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2668}
2669
2674static pm_call_node_t *
2675pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2676 pm_assert_value_expression(parser, receiver);
2677
2678 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2679 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2680 flags |= PM_CALL_NODE_FLAGS_INDEX;
2681 }
2682
2683 pm_call_node_t *node = pm_call_node_create(parser, flags);
2684
2685 node->base.location.start = receiver->location.start;
2686 node->base.location.end = pm_arguments_end(arguments);
2687
2688 node->receiver = receiver;
2689 node->message_loc.start = arguments->opening_loc.start;
2690 node->message_loc.end = arguments->closing_loc.end;
2691
2692 node->opening_loc = arguments->opening_loc;
2693 node->arguments = arguments->arguments;
2694 node->closing_loc = arguments->closing_loc;
2695 node->block = arguments->block;
2696
2697 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2698 return node;
2699}
2700
2704static pm_call_node_t *
2705pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2706 pm_assert_value_expression(parser, receiver);
2707 pm_assert_value_expression(parser, argument);
2708
2709 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2710
2711 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2712 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2713
2714 node->receiver = receiver;
2715 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2716
2717 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2718 pm_arguments_node_arguments_append(arguments, argument);
2719 node->arguments = arguments;
2720
2721 node->name = pm_parser_constant_id_token(parser, operator);
2722 return node;
2723}
2724
2725static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2726
2730static pm_call_node_t *
2731pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2732 pm_assert_value_expression(parser, receiver);
2733
2734 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2735
2736 node->base.location.start = receiver->location.start;
2737 const uint8_t *end = pm_arguments_end(arguments);
2738 if (end == NULL) {
2739 end = message->end;
2740 }
2741 node->base.location.end = end;
2742
2743 node->receiver = receiver;
2744 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2745 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2746 node->opening_loc = arguments->opening_loc;
2747 node->arguments = arguments->arguments;
2748 node->closing_loc = arguments->closing_loc;
2749 node->block = arguments->block;
2750
2751 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2752 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2753 }
2754
2759 node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
2760 return node;
2761}
2762
2766static pm_call_node_t *
2767pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2768 pm_call_node_t *node = pm_call_node_create(parser, 0);
2769 node->base.location.start = parser->start;
2770 node->base.location.end = parser->end;
2771
2772 node->receiver = receiver;
2773 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2774 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2775 node->arguments = arguments;
2776
2777 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2778 return node;
2779}
2780
2785static pm_call_node_t *
2786pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2787 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2788
2789 node->base.location.start = message->start;
2790 node->base.location.end = pm_arguments_end(arguments);
2791
2792 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2793 node->opening_loc = arguments->opening_loc;
2794 node->arguments = arguments->arguments;
2795 node->closing_loc = arguments->closing_loc;
2796 node->block = arguments->block;
2797
2798 node->name = pm_parser_constant_id_token(parser, message);
2799 return node;
2800}
2801
2806static pm_call_node_t *
2807pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2808 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2809
2810 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2811 node->arguments = arguments;
2812
2813 node->name = name;
2814 return node;
2815}
2816
2820static pm_call_node_t *
2821pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2822 pm_assert_value_expression(parser, receiver);
2823 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2824
2825 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2826
2827 node->base.location.start = message->start;
2828 if (arguments->closing_loc.start != NULL) {
2829 node->base.location.end = arguments->closing_loc.end;
2830 } else {
2831 assert(receiver != NULL);
2832 node->base.location.end = receiver->location.end;
2833 }
2834
2835 node->receiver = receiver;
2836 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2837 node->opening_loc = arguments->opening_loc;
2838 node->arguments = arguments->arguments;
2839 node->closing_loc = arguments->closing_loc;
2840
2841 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2842 return node;
2843}
2844
2848static pm_call_node_t *
2849pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2850 pm_assert_value_expression(parser, receiver);
2851
2852 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2853
2854 node->base.location.start = receiver->location.start;
2855 node->base.location.end = pm_arguments_end(arguments);
2856
2857 node->receiver = receiver;
2858 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2859 node->opening_loc = arguments->opening_loc;
2860 node->arguments = arguments->arguments;
2861 node->closing_loc = arguments->closing_loc;
2862 node->block = arguments->block;
2863
2864 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2865 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2866 }
2867
2868 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2869 return node;
2870}
2871
2875static pm_call_node_t *
2876pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2877 pm_assert_value_expression(parser, receiver);
2878
2879 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2880
2881 node->base.location.start = operator->start;
2882 node->base.location.end = receiver->location.end;
2883
2884 node->receiver = receiver;
2885 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2886
2887 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2888 return node;
2889}
2890
2895static pm_call_node_t *
2896pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2897 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2898
2899 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2900 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2901
2902 node->name = pm_parser_constant_id_token(parser, message);
2903 return node;
2904}
2905
2910static inline bool
2911pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2912 return (
2913 (node->message_loc.start != NULL) &&
2914 (node->message_loc.end[-1] != '!') &&
2915 (node->message_loc.end[-1] != '?') &&
2916 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2917 (node->opening_loc.start == NULL) &&
2918 (node->arguments == NULL) &&
2919 (node->block == NULL)
2920 );
2921}
2922
2926static void
2927pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2928 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2929
2930 if (write_constant->length > 0) {
2931 size_t length = write_constant->length - 1;
2932
2933 void *memory = xmalloc(length);
2934 memcpy(memory, write_constant->start, length);
2935
2936 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2937 } else {
2938 // We can get here if the message was missing because of a syntax error.
2939 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2940 }
2941}
2942
2946static pm_call_and_write_node_t *
2947pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2948 assert(target->block == NULL);
2949 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2950 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2951
2952 *node = (pm_call_and_write_node_t) {
2953 {
2954 .type = PM_CALL_AND_WRITE_NODE,
2955 .flags = target->base.flags,
2956 .node_id = PM_NODE_IDENTIFY(parser),
2957 .location = {
2958 .start = target->base.location.start,
2959 .end = value->location.end
2960 }
2961 },
2962 .receiver = target->receiver,
2963 .call_operator_loc = target->call_operator_loc,
2964 .message_loc = target->message_loc,
2965 .read_name = 0,
2966 .write_name = target->name,
2967 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2968 .value = value
2969 };
2970
2971 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2972
2973 // Here we're going to free the target, since it is no longer necessary.
2974 // However, we don't want to call `pm_node_destroy` because we want to keep
2975 // around all of its children since we just reused them.
2976 xfree(target);
2977
2978 return node;
2979}
2980
2985static void
2986pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2987 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2988 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2989 pm_node_t *node;
2990 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2991 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2992 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2993 break;
2994 }
2995 }
2996 }
2997
2998 if (block != NULL) {
2999 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3000 }
3001 }
3002}
3003
3007static pm_index_and_write_node_t *
3008pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3009 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3010 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3011
3012 pm_index_arguments_check(parser, target->arguments, target->block);
3013
3014 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3015 *node = (pm_index_and_write_node_t) {
3016 {
3017 .type = PM_INDEX_AND_WRITE_NODE,
3018 .flags = target->base.flags,
3019 .node_id = PM_NODE_IDENTIFY(parser),
3020 .location = {
3021 .start = target->base.location.start,
3022 .end = value->location.end
3023 }
3024 },
3025 .receiver = target->receiver,
3026 .call_operator_loc = target->call_operator_loc,
3027 .opening_loc = target->opening_loc,
3028 .arguments = target->arguments,
3029 .closing_loc = target->closing_loc,
3030 .block = (pm_block_argument_node_t *) target->block,
3031 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3032 .value = value
3033 };
3034
3035 // Here we're going to free the target, since it is no longer necessary.
3036 // However, we don't want to call `pm_node_destroy` because we want to keep
3037 // around all of its children since we just reused them.
3038 xfree(target);
3039
3040 return node;
3041}
3042
3046static pm_call_operator_write_node_t *
3047pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3048 assert(target->block == NULL);
3049 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3050
3051 *node = (pm_call_operator_write_node_t) {
3052 {
3053 .type = PM_CALL_OPERATOR_WRITE_NODE,
3054 .flags = target->base.flags,
3055 .node_id = PM_NODE_IDENTIFY(parser),
3056 .location = {
3057 .start = target->base.location.start,
3058 .end = value->location.end
3059 }
3060 },
3061 .receiver = target->receiver,
3062 .call_operator_loc = target->call_operator_loc,
3063 .message_loc = target->message_loc,
3064 .read_name = 0,
3065 .write_name = target->name,
3066 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3067 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3068 .value = value
3069 };
3070
3071 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3072
3073 // Here we're going to free the target, since it is no longer necessary.
3074 // However, we don't want to call `pm_node_destroy` because we want to keep
3075 // around all of its children since we just reused them.
3076 xfree(target);
3077
3078 return node;
3079}
3080
3084static pm_index_operator_write_node_t *
3085pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3086 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3087
3088 pm_index_arguments_check(parser, target->arguments, target->block);
3089
3090 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3091 *node = (pm_index_operator_write_node_t) {
3092 {
3093 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3094 .flags = target->base.flags,
3095 .node_id = PM_NODE_IDENTIFY(parser),
3096 .location = {
3097 .start = target->base.location.start,
3098 .end = value->location.end
3099 }
3100 },
3101 .receiver = target->receiver,
3102 .call_operator_loc = target->call_operator_loc,
3103 .opening_loc = target->opening_loc,
3104 .arguments = target->arguments,
3105 .closing_loc = target->closing_loc,
3106 .block = (pm_block_argument_node_t *) target->block,
3107 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3108 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3109 .value = value
3110 };
3111
3112 // Here we're going to free the target, since it is no longer necessary.
3113 // However, we don't want to call `pm_node_destroy` because we want to keep
3114 // around all of its children since we just reused them.
3115 xfree(target);
3116
3117 return node;
3118}
3119
3123static pm_call_or_write_node_t *
3124pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3125 assert(target->block == NULL);
3126 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3127 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3128
3129 *node = (pm_call_or_write_node_t) {
3130 {
3131 .type = PM_CALL_OR_WRITE_NODE,
3132 .flags = target->base.flags,
3133 .node_id = PM_NODE_IDENTIFY(parser),
3134 .location = {
3135 .start = target->base.location.start,
3136 .end = value->location.end
3137 }
3138 },
3139 .receiver = target->receiver,
3140 .call_operator_loc = target->call_operator_loc,
3141 .message_loc = target->message_loc,
3142 .read_name = 0,
3143 .write_name = target->name,
3144 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3145 .value = value
3146 };
3147
3148 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3149
3150 // Here we're going to free the target, since it is no longer necessary.
3151 // However, we don't want to call `pm_node_destroy` because we want to keep
3152 // around all of its children since we just reused them.
3153 xfree(target);
3154
3155 return node;
3156}
3157
3161static pm_index_or_write_node_t *
3162pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3163 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3164 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3165
3166 pm_index_arguments_check(parser, target->arguments, target->block);
3167
3168 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3169 *node = (pm_index_or_write_node_t) {
3170 {
3171 .type = PM_INDEX_OR_WRITE_NODE,
3172 .flags = target->base.flags,
3173 .node_id = PM_NODE_IDENTIFY(parser),
3174 .location = {
3175 .start = target->base.location.start,
3176 .end = value->location.end
3177 }
3178 },
3179 .receiver = target->receiver,
3180 .call_operator_loc = target->call_operator_loc,
3181 .opening_loc = target->opening_loc,
3182 .arguments = target->arguments,
3183 .closing_loc = target->closing_loc,
3184 .block = (pm_block_argument_node_t *) target->block,
3185 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3186 .value = value
3187 };
3188
3189 // Here we're going to free the target, since it is no longer necessary.
3190 // However, we don't want to call `pm_node_destroy` because we want to keep
3191 // around all of its children since we just reused them.
3192 xfree(target);
3193
3194 return node;
3195}
3196
3201static pm_call_target_node_t *
3202pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3203 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3204
3205 *node = (pm_call_target_node_t) {
3206 {
3207 .type = PM_CALL_TARGET_NODE,
3208 .flags = target->base.flags,
3209 .node_id = PM_NODE_IDENTIFY(parser),
3210 .location = target->base.location
3211 },
3212 .receiver = target->receiver,
3213 .call_operator_loc = target->call_operator_loc,
3214 .name = target->name,
3215 .message_loc = target->message_loc
3216 };
3217
3218 // Here we're going to free the target, since it is no longer necessary.
3219 // However, we don't want to call `pm_node_destroy` because we want to keep
3220 // around all of its children since we just reused them.
3221 xfree(target);
3222
3223 return node;
3224}
3225
3230static pm_index_target_node_t *
3231pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3232 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3233 pm_node_flags_t flags = target->base.flags;
3234
3235 pm_index_arguments_check(parser, target->arguments, target->block);
3236
3237 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3238 *node = (pm_index_target_node_t) {
3239 {
3240 .type = PM_INDEX_TARGET_NODE,
3241 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3242 .node_id = PM_NODE_IDENTIFY(parser),
3243 .location = target->base.location
3244 },
3245 .receiver = target->receiver,
3246 .opening_loc = target->opening_loc,
3247 .arguments = target->arguments,
3248 .closing_loc = target->closing_loc,
3249 .block = (pm_block_argument_node_t *) target->block,
3250 };
3251
3252 // Here we're going to free the target, since it is no longer necessary.
3253 // However, we don't want to call `pm_node_destroy` because we want to keep
3254 // around all of its children since we just reused them.
3255 xfree(target);
3256
3257 return node;
3258}
3259
3263static pm_capture_pattern_node_t *
3264pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3265 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3266
3267 *node = (pm_capture_pattern_node_t) {
3268 {
3269 .type = PM_CAPTURE_PATTERN_NODE,
3270 .node_id = PM_NODE_IDENTIFY(parser),
3271 .location = {
3272 .start = value->location.start,
3273 .end = target->base.location.end
3274 },
3275 },
3276 .value = value,
3277 .target = target,
3278 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3279 };
3280
3281 return node;
3282}
3283
3287static pm_case_node_t *
3288pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3289 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3290
3291 *node = (pm_case_node_t) {
3292 {
3293 .type = PM_CASE_NODE,
3294 .node_id = PM_NODE_IDENTIFY(parser),
3295 .location = {
3296 .start = case_keyword->start,
3297 .end = end_keyword->end
3298 },
3299 },
3300 .predicate = predicate,
3301 .else_clause = NULL,
3302 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3303 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3304 .conditions = { 0 }
3305 };
3306
3307 return node;
3308}
3309
3313static void
3314pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3315 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3316
3317 pm_node_list_append(&node->conditions, condition);
3318 node->base.location.end = condition->location.end;
3319}
3320
3324static void
3325pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3326 node->else_clause = else_clause;
3327 node->base.location.end = else_clause->base.location.end;
3328}
3329
3333static void
3334pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3335 node->base.location.end = end_keyword->end;
3336 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3337}
3338
3342static pm_case_match_node_t *
3343pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3344 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3345
3346 *node = (pm_case_match_node_t) {
3347 {
3348 .type = PM_CASE_MATCH_NODE,
3349 .node_id = PM_NODE_IDENTIFY(parser),
3350 .location = {
3351 .start = case_keyword->start,
3352 .end = end_keyword->end
3353 },
3354 },
3355 .predicate = predicate,
3356 .else_clause = NULL,
3357 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3358 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3359 .conditions = { 0 }
3360 };
3361
3362 return node;
3363}
3364
3368static void
3369pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3370 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3371
3372 pm_node_list_append(&node->conditions, condition);
3373 node->base.location.end = condition->location.end;
3374}
3375
3379static void
3380pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3381 node->else_clause = else_clause;
3382 node->base.location.end = else_clause->base.location.end;
3383}
3384
3388static void
3389pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3390 node->base.location.end = end_keyword->end;
3391 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3392}
3393
3397static pm_class_node_t *
3398pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3399 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3400
3401 *node = (pm_class_node_t) {
3402 {
3403 .type = PM_CLASS_NODE,
3404 .node_id = PM_NODE_IDENTIFY(parser),
3405 .location = { .start = class_keyword->start, .end = end_keyword->end },
3406 },
3407 .locals = *locals,
3408 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3409 .constant_path = constant_path,
3410 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3411 .superclass = superclass,
3412 .body = body,
3413 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3414 .name = pm_parser_constant_id_token(parser, name)
3415 };
3416
3417 return node;
3418}
3419
3423static pm_class_variable_and_write_node_t *
3424pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3425 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3426 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3427
3428 *node = (pm_class_variable_and_write_node_t) {
3429 {
3430 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3431 .node_id = PM_NODE_IDENTIFY(parser),
3432 .location = {
3433 .start = target->base.location.start,
3434 .end = value->location.end
3435 }
3436 },
3437 .name = target->name,
3438 .name_loc = target->base.location,
3439 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3440 .value = value
3441 };
3442
3443 return node;
3444}
3445
3449static pm_class_variable_operator_write_node_t *
3450pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3451 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3452
3453 *node = (pm_class_variable_operator_write_node_t) {
3454 {
3455 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3456 .node_id = PM_NODE_IDENTIFY(parser),
3457 .location = {
3458 .start = target->base.location.start,
3459 .end = value->location.end
3460 }
3461 },
3462 .name = target->name,
3463 .name_loc = target->base.location,
3464 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3465 .value = value,
3466 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3467 };
3468
3469 return node;
3470}
3471
3475static pm_class_variable_or_write_node_t *
3476pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3477 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3478 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3479
3480 *node = (pm_class_variable_or_write_node_t) {
3481 {
3482 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3483 .node_id = PM_NODE_IDENTIFY(parser),
3484 .location = {
3485 .start = target->base.location.start,
3486 .end = value->location.end
3487 }
3488 },
3489 .name = target->name,
3490 .name_loc = target->base.location,
3491 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3492 .value = value
3493 };
3494
3495 return node;
3496}
3497
3501static pm_class_variable_read_node_t *
3502pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3503 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3504 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3505
3506 *node = (pm_class_variable_read_node_t) {
3507 {
3508 .type = PM_CLASS_VARIABLE_READ_NODE,
3509 .node_id = PM_NODE_IDENTIFY(parser),
3510 .location = PM_LOCATION_TOKEN_VALUE(token)
3511 },
3512 .name = pm_parser_constant_id_token(parser, token)
3513 };
3514
3515 return node;
3516}
3517
3524static inline pm_node_flags_t
3525pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3526 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3527 return flags;
3528 }
3529 return 0;
3530}
3531
3535static pm_class_variable_write_node_t *
3536pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3537 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3538
3539 *node = (pm_class_variable_write_node_t) {
3540 {
3541 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3542 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3543 .node_id = PM_NODE_IDENTIFY(parser),
3544 .location = {
3545 .start = read_node->base.location.start,
3546 .end = value->location.end
3547 },
3548 },
3549 .name = read_node->name,
3550 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3551 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3552 .value = value
3553 };
3554
3555 return node;
3556}
3557
3561static pm_constant_path_and_write_node_t *
3562pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3563 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3564 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3565
3566 *node = (pm_constant_path_and_write_node_t) {
3567 {
3568 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3569 .node_id = PM_NODE_IDENTIFY(parser),
3570 .location = {
3571 .start = target->base.location.start,
3572 .end = value->location.end
3573 }
3574 },
3575 .target = target,
3576 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3577 .value = value
3578 };
3579
3580 return node;
3581}
3582
3586static pm_constant_path_operator_write_node_t *
3587pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3588 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3589
3590 *node = (pm_constant_path_operator_write_node_t) {
3591 {
3592 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3593 .node_id = PM_NODE_IDENTIFY(parser),
3594 .location = {
3595 .start = target->base.location.start,
3596 .end = value->location.end
3597 }
3598 },
3599 .target = target,
3600 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3601 .value = value,
3602 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3603 };
3604
3605 return node;
3606}
3607
3611static pm_constant_path_or_write_node_t *
3612pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3613 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3614 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3615
3616 *node = (pm_constant_path_or_write_node_t) {
3617 {
3618 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3619 .node_id = PM_NODE_IDENTIFY(parser),
3620 .location = {
3621 .start = target->base.location.start,
3622 .end = value->location.end
3623 }
3624 },
3625 .target = target,
3626 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3627 .value = value
3628 };
3629
3630 return node;
3631}
3632
3636static pm_constant_path_node_t *
3637pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3638 pm_assert_value_expression(parser, parent);
3639 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3640
3641 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3642 if (name_token->type == PM_TOKEN_CONSTANT) {
3643 name = pm_parser_constant_id_token(parser, name_token);
3644 }
3645
3646 *node = (pm_constant_path_node_t) {
3647 {
3648 .type = PM_CONSTANT_PATH_NODE,
3649 .node_id = PM_NODE_IDENTIFY(parser),
3650 .location = {
3651 .start = parent == NULL ? delimiter->start : parent->location.start,
3652 .end = name_token->end
3653 },
3654 },
3655 .parent = parent,
3656 .name = name,
3657 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3658 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3659 };
3660
3661 return node;
3662}
3663
3667static pm_constant_path_write_node_t *
3668pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3669 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3670
3671 *node = (pm_constant_path_write_node_t) {
3672 {
3673 .type = PM_CONSTANT_PATH_WRITE_NODE,
3674 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3675 .node_id = PM_NODE_IDENTIFY(parser),
3676 .location = {
3677 .start = target->base.location.start,
3678 .end = value->location.end
3679 },
3680 },
3681 .target = target,
3682 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3683 .value = value
3684 };
3685
3686 return node;
3687}
3688
3692static pm_constant_and_write_node_t *
3693pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3694 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3695 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3696
3697 *node = (pm_constant_and_write_node_t) {
3698 {
3699 .type = PM_CONSTANT_AND_WRITE_NODE,
3700 .node_id = PM_NODE_IDENTIFY(parser),
3701 .location = {
3702 .start = target->base.location.start,
3703 .end = value->location.end
3704 }
3705 },
3706 .name = target->name,
3707 .name_loc = target->base.location,
3708 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3709 .value = value
3710 };
3711
3712 return node;
3713}
3714
3718static pm_constant_operator_write_node_t *
3719pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3720 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3721
3722 *node = (pm_constant_operator_write_node_t) {
3723 {
3724 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3725 .node_id = PM_NODE_IDENTIFY(parser),
3726 .location = {
3727 .start = target->base.location.start,
3728 .end = value->location.end
3729 }
3730 },
3731 .name = target->name,
3732 .name_loc = target->base.location,
3733 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3734 .value = value,
3735 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3736 };
3737
3738 return node;
3739}
3740
3744static pm_constant_or_write_node_t *
3745pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3746 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3747 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3748
3749 *node = (pm_constant_or_write_node_t) {
3750 {
3751 .type = PM_CONSTANT_OR_WRITE_NODE,
3752 .node_id = PM_NODE_IDENTIFY(parser),
3753 .location = {
3754 .start = target->base.location.start,
3755 .end = value->location.end
3756 }
3757 },
3758 .name = target->name,
3759 .name_loc = target->base.location,
3760 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3761 .value = value
3762 };
3763
3764 return node;
3765}
3766
3770static pm_constant_read_node_t *
3771pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3772 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3773 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3774
3775 *node = (pm_constant_read_node_t) {
3776 {
3777 .type = PM_CONSTANT_READ_NODE,
3778 .node_id = PM_NODE_IDENTIFY(parser),
3779 .location = PM_LOCATION_TOKEN_VALUE(name)
3780 },
3781 .name = pm_parser_constant_id_token(parser, name)
3782 };
3783
3784 return node;
3785}
3786
3790static pm_constant_write_node_t *
3791pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3792 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3793
3794 *node = (pm_constant_write_node_t) {
3795 {
3796 .type = PM_CONSTANT_WRITE_NODE,
3797 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3798 .node_id = PM_NODE_IDENTIFY(parser),
3799 .location = {
3800 .start = target->base.location.start,
3801 .end = value->location.end
3802 }
3803 },
3804 .name = target->name,
3805 .name_loc = target->base.location,
3806 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3807 .value = value
3808 };
3809
3810 return node;
3811}
3812
3816static void
3817pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3818 switch (PM_NODE_TYPE(node)) {
3819 case PM_BEGIN_NODE: {
3820 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3821 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3822 break;
3823 }
3824 case PM_PARENTHESES_NODE: {
3825 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3826 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3827 break;
3828 }
3829 case PM_STATEMENTS_NODE: {
3830 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3831 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3832 break;
3833 }
3834 case PM_ARRAY_NODE:
3835 case PM_FLOAT_NODE:
3836 case PM_IMAGINARY_NODE:
3837 case PM_INTEGER_NODE:
3838 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3839 case PM_INTERPOLATED_STRING_NODE:
3840 case PM_INTERPOLATED_SYMBOL_NODE:
3841 case PM_INTERPOLATED_X_STRING_NODE:
3842 case PM_RATIONAL_NODE:
3843 case PM_REGULAR_EXPRESSION_NODE:
3844 case PM_SOURCE_ENCODING_NODE:
3845 case PM_SOURCE_FILE_NODE:
3846 case PM_SOURCE_LINE_NODE:
3847 case PM_STRING_NODE:
3848 case PM_SYMBOL_NODE:
3849 case PM_X_STRING_NODE:
3850 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3851 break;
3852 default:
3853 break;
3854 }
3855}
3856
3860static pm_def_node_t *
3861pm_def_node_create(
3862 pm_parser_t *parser,
3863 pm_constant_id_t name,
3864 const pm_token_t *name_loc,
3865 pm_node_t *receiver,
3866 pm_parameters_node_t *parameters,
3867 pm_node_t *body,
3868 pm_constant_id_list_t *locals,
3869 const pm_token_t *def_keyword,
3870 const pm_token_t *operator,
3871 const pm_token_t *lparen,
3872 const pm_token_t *rparen,
3873 const pm_token_t *equal,
3874 const pm_token_t *end_keyword
3875) {
3876 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3877 const uint8_t *end;
3878
3879 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3880 end = body->location.end;
3881 } else {
3882 end = end_keyword->end;
3883 }
3884
3885 if (receiver != NULL) {
3886 pm_def_node_receiver_check(parser, receiver);
3887 }
3888
3889 *node = (pm_def_node_t) {
3890 {
3891 .type = PM_DEF_NODE,
3892 .node_id = PM_NODE_IDENTIFY(parser),
3893 .location = { .start = def_keyword->start, .end = end },
3894 },
3895 .name = name,
3896 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3897 .receiver = receiver,
3898 .parameters = parameters,
3899 .body = body,
3900 .locals = *locals,
3901 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3902 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3903 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3904 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3905 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3906 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3907 };
3908
3909 return node;
3910}
3911
3915static pm_defined_node_t *
3916pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3917 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3918
3919 *node = (pm_defined_node_t) {
3920 {
3921 .type = PM_DEFINED_NODE,
3922 .node_id = PM_NODE_IDENTIFY(parser),
3923 .location = {
3924 .start = keyword_loc->start,
3925 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3926 },
3927 },
3928 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3929 .value = value,
3930 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3931 .keyword_loc = *keyword_loc
3932 };
3933
3934 return node;
3935}
3936
3940static pm_else_node_t *
3941pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3942 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3943 const uint8_t *end = NULL;
3944 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3945 end = statements->base.location.end;
3946 } else {
3947 end = end_keyword->end;
3948 }
3949
3950 *node = (pm_else_node_t) {
3951 {
3952 .type = PM_ELSE_NODE,
3953 .node_id = PM_NODE_IDENTIFY(parser),
3954 .location = {
3955 .start = else_keyword->start,
3956 .end = end,
3957 },
3958 },
3959 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3960 .statements = statements,
3961 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3962 };
3963
3964 return node;
3965}
3966
3970static pm_embedded_statements_node_t *
3971pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3972 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3973
3974 *node = (pm_embedded_statements_node_t) {
3975 {
3976 .type = PM_EMBEDDED_STATEMENTS_NODE,
3977 .node_id = PM_NODE_IDENTIFY(parser),
3978 .location = {
3979 .start = opening->start,
3980 .end = closing->end
3981 }
3982 },
3983 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3984 .statements = statements,
3985 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3986 };
3987
3988 return node;
3989}
3990
3994static pm_embedded_variable_node_t *
3995pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3996 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3997
3998 *node = (pm_embedded_variable_node_t) {
3999 {
4000 .type = PM_EMBEDDED_VARIABLE_NODE,
4001 .node_id = PM_NODE_IDENTIFY(parser),
4002 .location = {
4003 .start = operator->start,
4004 .end = variable->location.end
4005 }
4006 },
4007 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4008 .variable = variable
4009 };
4010
4011 return node;
4012}
4013
4017static pm_ensure_node_t *
4018pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4019 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4020
4021 *node = (pm_ensure_node_t) {
4022 {
4023 .type = PM_ENSURE_NODE,
4024 .node_id = PM_NODE_IDENTIFY(parser),
4025 .location = {
4026 .start = ensure_keyword->start,
4027 .end = end_keyword->end
4028 },
4029 },
4030 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4031 .statements = statements,
4032 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4033 };
4034
4035 return node;
4036}
4037
4041static pm_false_node_t *
4042pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4043 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4044 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4045
4046 *node = (pm_false_node_t) {{
4047 .type = PM_FALSE_NODE,
4048 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4049 .node_id = PM_NODE_IDENTIFY(parser),
4050 .location = PM_LOCATION_TOKEN_VALUE(token)
4051 }};
4052
4053 return node;
4054}
4055
4060static pm_find_pattern_node_t *
4061pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4062 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4063
4064 pm_node_t *left = nodes->nodes[0];
4065 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4066 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4067
4068 pm_node_t *right;
4069
4070 if (nodes->size == 1) {
4071 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4072 } else {
4073 right = nodes->nodes[nodes->size - 1];
4074 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4075 }
4076
4077#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4078 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4079 // The resulting AST will anyway be ignored, but this file still needs to compile.
4080 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4081#else
4082 pm_node_t *right_splat_node = right;
4083#endif
4084 *node = (pm_find_pattern_node_t) {
4085 {
4086 .type = PM_FIND_PATTERN_NODE,
4087 .node_id = PM_NODE_IDENTIFY(parser),
4088 .location = {
4089 .start = left->location.start,
4090 .end = right->location.end,
4091 },
4092 },
4093 .constant = NULL,
4094 .left = left_splat_node,
4095 .right = right_splat_node,
4096 .requireds = { 0 },
4097 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4098 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4099 };
4100
4101 // For now we're going to just copy over each pointer manually. This could be
4102 // much more efficient, as we could instead resize the node list to only point
4103 // to 1...-1.
4104 for (size_t index = 1; index < nodes->size - 1; index++) {
4105 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4106 }
4107
4108 return node;
4109}
4110
4115static double
4116pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4117 ptrdiff_t diff = token->end - token->start;
4118 if (diff <= 0) return 0.0;
4119
4120 // First, get a buffer of the content.
4121 size_t length = (size_t) diff;
4122 char *buffer = xmalloc(sizeof(char) * (length + 1));
4123 memcpy((void *) buffer, token->start, length);
4124
4125 // Next, determine if we need to replace the decimal point because of
4126 // locale-specific options, and then normalize them if we have to.
4127 char decimal_point = *localeconv()->decimal_point;
4128 if (decimal_point != '.') {
4129 for (size_t index = 0; index < length; index++) {
4130 if (buffer[index] == '.') buffer[index] = decimal_point;
4131 }
4132 }
4133
4134 // Next, handle underscores by removing them from the buffer.
4135 for (size_t index = 0; index < length; index++) {
4136 if (buffer[index] == '_') {
4137 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4138 length--;
4139 }
4140 }
4141
4142 // Null-terminate the buffer so that strtod cannot read off the end.
4143 buffer[length] = '\0';
4144
4145 // Now, call strtod to parse the value. Note that CRuby has their own
4146 // version of strtod which avoids locales. We're okay using the locale-aware
4147 // version because we've already validated through the parser that the token
4148 // is in a valid format.
4149 errno = 0;
4150 char *eptr;
4151 double value = strtod(buffer, &eptr);
4152
4153 // This should never happen, because we've already checked that the token
4154 // is in a valid format. However it's good to be safe.
4155 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4156 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4157 xfree((void *) buffer);
4158 return 0.0;
4159 }
4160
4161 // If errno is set, then it should only be ERANGE. At this point we need to
4162 // check if it's infinity (it should be).
4163 if (errno == ERANGE && PRISM_ISINF(value)) {
4164 int warn_width;
4165 const char *ellipsis;
4166
4167 if (length > 20) {
4168 warn_width = 20;
4169 ellipsis = "...";
4170 } else {
4171 warn_width = (int) length;
4172 ellipsis = "";
4173 }
4174
4175 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4176 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4177 }
4178
4179 // Finally we can free the buffer and return the value.
4180 xfree((void *) buffer);
4181 return value;
4182}
4183
4187static pm_float_node_t *
4188pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4189 assert(token->type == PM_TOKEN_FLOAT);
4190 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4191
4192 *node = (pm_float_node_t) {
4193 {
4194 .type = PM_FLOAT_NODE,
4195 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4196 .node_id = PM_NODE_IDENTIFY(parser),
4197 .location = PM_LOCATION_TOKEN_VALUE(token)
4198 },
4199 .value = pm_double_parse(parser, token)
4200 };
4201
4202 return node;
4203}
4204
4208static pm_imaginary_node_t *
4209pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4210 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4211
4212 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4213 *node = (pm_imaginary_node_t) {
4214 {
4215 .type = PM_IMAGINARY_NODE,
4216 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4217 .node_id = PM_NODE_IDENTIFY(parser),
4218 .location = PM_LOCATION_TOKEN_VALUE(token)
4219 },
4220 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4221 .type = PM_TOKEN_FLOAT,
4222 .start = token->start,
4223 .end = token->end - 1
4224 }))
4225 };
4226
4227 return node;
4228}
4229
4233static pm_rational_node_t *
4234pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4235 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4236
4237 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4238 *node = (pm_rational_node_t) {
4239 {
4240 .type = PM_RATIONAL_NODE,
4241 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4242 .node_id = PM_NODE_IDENTIFY(parser),
4243 .location = PM_LOCATION_TOKEN_VALUE(token)
4244 },
4245 .numerator = { 0 },
4246 .denominator = { 0 }
4247 };
4248
4249 const uint8_t *start = token->start;
4250 const uint8_t *end = token->end - 1; // r
4251
4252 while (start < end && *start == '0') start++; // 0.1 -> .1
4253 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4254
4255 size_t length = (size_t) (end - start);
4256 if (length == 1) {
4257 node->denominator.value = 1;
4258 return node;
4259 }
4260
4261 const uint8_t *point = memchr(start, '.', length);
4262 assert(point && "should have a decimal point");
4263
4264 uint8_t *digits = xmalloc(length);
4265 if (digits == NULL) {
4266 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4267 abort();
4268 }
4269
4270 memcpy(digits, start, (unsigned long) (point - start));
4271 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4272 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4273
4274 digits[0] = '1';
4275 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4276 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4277 xfree(digits);
4278
4279 pm_integers_reduce(&node->numerator, &node->denominator);
4280 return node;
4281}
4282
4287static pm_imaginary_node_t *
4288pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4289 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4290
4291 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4292 *node = (pm_imaginary_node_t) {
4293 {
4294 .type = PM_IMAGINARY_NODE,
4295 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4296 .node_id = PM_NODE_IDENTIFY(parser),
4297 .location = PM_LOCATION_TOKEN_VALUE(token)
4298 },
4299 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4300 .type = PM_TOKEN_FLOAT_RATIONAL,
4301 .start = token->start,
4302 .end = token->end - 1
4303 }))
4304 };
4305
4306 return node;
4307}
4308
4312static pm_for_node_t *
4313pm_for_node_create(
4314 pm_parser_t *parser,
4315 pm_node_t *index,
4316 pm_node_t *collection,
4317 pm_statements_node_t *statements,
4318 const pm_token_t *for_keyword,
4319 const pm_token_t *in_keyword,
4320 const pm_token_t *do_keyword,
4321 const pm_token_t *end_keyword
4322) {
4323 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4324
4325 *node = (pm_for_node_t) {
4326 {
4327 .type = PM_FOR_NODE,
4328 .node_id = PM_NODE_IDENTIFY(parser),
4329 .location = {
4330 .start = for_keyword->start,
4331 .end = end_keyword->end
4332 },
4333 },
4334 .index = index,
4335 .collection = collection,
4336 .statements = statements,
4337 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4338 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4339 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4340 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4341 };
4342
4343 return node;
4344}
4345
4349static pm_forwarding_arguments_node_t *
4350pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4351 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4352 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4353
4354 *node = (pm_forwarding_arguments_node_t) {{
4355 .type = PM_FORWARDING_ARGUMENTS_NODE,
4356 .node_id = PM_NODE_IDENTIFY(parser),
4357 .location = PM_LOCATION_TOKEN_VALUE(token)
4358 }};
4359
4360 return node;
4361}
4362
4366static pm_forwarding_parameter_node_t *
4367pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4368 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4369 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4370
4371 *node = (pm_forwarding_parameter_node_t) {{
4372 .type = PM_FORWARDING_PARAMETER_NODE,
4373 .node_id = PM_NODE_IDENTIFY(parser),
4374 .location = PM_LOCATION_TOKEN_VALUE(token)
4375 }};
4376
4377 return node;
4378}
4379
4383static pm_forwarding_super_node_t *
4384pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4385 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4386 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4387 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4388
4389 pm_block_node_t *block = NULL;
4390 if (arguments->block != NULL) {
4391 block = (pm_block_node_t *) arguments->block;
4392 }
4393
4394 *node = (pm_forwarding_super_node_t) {
4395 {
4396 .type = PM_FORWARDING_SUPER_NODE,
4397 .node_id = PM_NODE_IDENTIFY(parser),
4398 .location = {
4399 .start = token->start,
4400 .end = block != NULL ? block->base.location.end : token->end
4401 },
4402 },
4403 .block = block
4404 };
4405
4406 return node;
4407}
4408
4413static pm_hash_pattern_node_t *
4414pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4415 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4416
4417 *node = (pm_hash_pattern_node_t) {
4418 {
4419 .type = PM_HASH_PATTERN_NODE,
4420 .node_id = PM_NODE_IDENTIFY(parser),
4421 .location = {
4422 .start = opening->start,
4423 .end = closing->end
4424 },
4425 },
4426 .constant = NULL,
4427 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4428 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4429 .elements = { 0 },
4430 .rest = NULL
4431 };
4432
4433 return node;
4434}
4435
4439static pm_hash_pattern_node_t *
4440pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4441 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4442
4443 const uint8_t *start;
4444 const uint8_t *end;
4445
4446 if (elements->size > 0) {
4447 if (rest) {
4448 start = elements->nodes[0]->location.start;
4449 end = rest->location.end;
4450 } else {
4451 start = elements->nodes[0]->location.start;
4452 end = elements->nodes[elements->size - 1]->location.end;
4453 }
4454 } else {
4455 assert(rest != NULL);
4456 start = rest->location.start;
4457 end = rest->location.end;
4458 }
4459
4460 *node = (pm_hash_pattern_node_t) {
4461 {
4462 .type = PM_HASH_PATTERN_NODE,
4463 .node_id = PM_NODE_IDENTIFY(parser),
4464 .location = {
4465 .start = start,
4466 .end = end
4467 },
4468 },
4469 .constant = NULL,
4470 .elements = { 0 },
4471 .rest = rest,
4472 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4473 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4474 };
4475
4476 pm_node_t *element;
4477 PM_NODE_LIST_FOREACH(elements, index, element) {
4478 pm_node_list_append(&node->elements, element);
4479 }
4480
4481 return node;
4482}
4483
4487static pm_constant_id_t
4488pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4489 switch (PM_NODE_TYPE(target)) {
4490 case PM_GLOBAL_VARIABLE_READ_NODE:
4491 return ((pm_global_variable_read_node_t *) target)->name;
4492 case PM_BACK_REFERENCE_READ_NODE:
4493 return ((pm_back_reference_read_node_t *) target)->name;
4494 case PM_NUMBERED_REFERENCE_READ_NODE:
4495 // This will only ever happen in the event of a syntax error, but we
4496 // still need to provide something for the node.
4497 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4498 default:
4499 assert(false && "unreachable");
4500 return (pm_constant_id_t) -1;
4501 }
4502}
4503
4507static pm_global_variable_and_write_node_t *
4508pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4509 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4510 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4511
4512 *node = (pm_global_variable_and_write_node_t) {
4513 {
4514 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4515 .node_id = PM_NODE_IDENTIFY(parser),
4516 .location = {
4517 .start = target->location.start,
4518 .end = value->location.end
4519 }
4520 },
4521 .name = pm_global_variable_write_name(parser, target),
4522 .name_loc = target->location,
4523 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4524 .value = value
4525 };
4526
4527 return node;
4528}
4529
4533static pm_global_variable_operator_write_node_t *
4534pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4535 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4536
4537 *node = (pm_global_variable_operator_write_node_t) {
4538 {
4539 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4540 .node_id = PM_NODE_IDENTIFY(parser),
4541 .location = {
4542 .start = target->location.start,
4543 .end = value->location.end
4544 }
4545 },
4546 .name = pm_global_variable_write_name(parser, target),
4547 .name_loc = target->location,
4548 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4549 .value = value,
4550 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4551 };
4552
4553 return node;
4554}
4555
4559static pm_global_variable_or_write_node_t *
4560pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4561 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4562 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4563
4564 *node = (pm_global_variable_or_write_node_t) {
4565 {
4566 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4567 .node_id = PM_NODE_IDENTIFY(parser),
4568 .location = {
4569 .start = target->location.start,
4570 .end = value->location.end
4571 }
4572 },
4573 .name = pm_global_variable_write_name(parser, target),
4574 .name_loc = target->location,
4575 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4576 .value = value
4577 };
4578
4579 return node;
4580}
4581
4585static pm_global_variable_read_node_t *
4586pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4587 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4588
4589 *node = (pm_global_variable_read_node_t) {
4590 {
4591 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4592 .node_id = PM_NODE_IDENTIFY(parser),
4593 .location = PM_LOCATION_TOKEN_VALUE(name),
4594 },
4595 .name = pm_parser_constant_id_token(parser, name)
4596 };
4597
4598 return node;
4599}
4600
4604static pm_global_variable_read_node_t *
4605pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4606 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4607
4608 *node = (pm_global_variable_read_node_t) {
4609 {
4610 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4611 .node_id = PM_NODE_IDENTIFY(parser),
4612 .location = PM_LOCATION_NULL_VALUE(parser)
4613 },
4614 .name = name
4615 };
4616
4617 return node;
4618}
4619
4623static pm_global_variable_write_node_t *
4624pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4625 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4626
4627 *node = (pm_global_variable_write_node_t) {
4628 {
4629 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4630 .node_id = PM_NODE_IDENTIFY(parser),
4631 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4632 .location = {
4633 .start = target->location.start,
4634 .end = value->location.end
4635 },
4636 },
4637 .name = pm_global_variable_write_name(parser, target),
4638 .name_loc = PM_LOCATION_NODE_VALUE(target),
4639 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4640 .value = value
4641 };
4642
4643 return node;
4644}
4645
4649static pm_global_variable_write_node_t *
4650pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4651 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4652
4653 *node = (pm_global_variable_write_node_t) {
4654 {
4655 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4656 .node_id = PM_NODE_IDENTIFY(parser),
4657 .location = PM_LOCATION_NULL_VALUE(parser)
4658 },
4659 .name = name,
4660 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4661 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4662 .value = value
4663 };
4664
4665 return node;
4666}
4667
4671static pm_hash_node_t *
4672pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4673 assert(opening != NULL);
4674 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4675
4676 *node = (pm_hash_node_t) {
4677 {
4678 .type = PM_HASH_NODE,
4679 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4680 .node_id = PM_NODE_IDENTIFY(parser),
4681 .location = PM_LOCATION_TOKEN_VALUE(opening)
4682 },
4683 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4684 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4685 .elements = { 0 }
4686 };
4687
4688 return node;
4689}
4690
4694static inline void
4695pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4696 pm_node_list_append(&hash->elements, element);
4697
4698 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4699 if (static_literal) {
4700 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4701 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4702 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4703 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4704 }
4705
4706 if (!static_literal) {
4707 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4708 }
4709}
4710
4711static inline void
4712pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4713 hash->base.location.end = token->end;
4714 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4715}
4716
4720static pm_if_node_t *
4721pm_if_node_create(pm_parser_t *parser,
4722 const pm_token_t *if_keyword,
4723 pm_node_t *predicate,
4724 const pm_token_t *then_keyword,
4725 pm_statements_node_t *statements,
4726 pm_node_t *subsequent,
4727 const pm_token_t *end_keyword
4728) {
4729 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4730 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4731
4732 const uint8_t *end;
4733 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4734 end = end_keyword->end;
4735 } else if (subsequent != NULL) {
4736 end = subsequent->location.end;
4737 } else if (pm_statements_node_body_length(statements) != 0) {
4738 end = statements->base.location.end;
4739 } else {
4740 end = predicate->location.end;
4741 }
4742
4743 *node = (pm_if_node_t) {
4744 {
4745 .type = PM_IF_NODE,
4746 .flags = PM_NODE_FLAG_NEWLINE,
4747 .node_id = PM_NODE_IDENTIFY(parser),
4748 .location = {
4749 .start = if_keyword->start,
4750 .end = end
4751 },
4752 },
4753 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4754 .predicate = predicate,
4755 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4756 .statements = statements,
4757 .subsequent = subsequent,
4758 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4759 };
4760
4761 return node;
4762}
4763
4767static pm_if_node_t *
4768pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4769 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4770 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4771
4772 pm_statements_node_t *statements = pm_statements_node_create(parser);
4773 pm_statements_node_body_append(parser, statements, statement, true);
4774
4775 *node = (pm_if_node_t) {
4776 {
4777 .type = PM_IF_NODE,
4778 .flags = PM_NODE_FLAG_NEWLINE,
4779 .node_id = PM_NODE_IDENTIFY(parser),
4780 .location = {
4781 .start = statement->location.start,
4782 .end = predicate->location.end
4783 },
4784 },
4785 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4786 .predicate = predicate,
4787 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4788 .statements = statements,
4789 .subsequent = NULL,
4790 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4791 };
4792
4793 return node;
4794}
4795
4799static pm_if_node_t *
4800pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4801 pm_assert_value_expression(parser, predicate);
4802 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4803
4804 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4805 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4806
4807 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4808 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4809
4810 pm_token_t end_keyword = not_provided(parser);
4811 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4812
4813 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4814
4815 *node = (pm_if_node_t) {
4816 {
4817 .type = PM_IF_NODE,
4818 .flags = PM_NODE_FLAG_NEWLINE,
4819 .node_id = PM_NODE_IDENTIFY(parser),
4820 .location = {
4821 .start = predicate->location.start,
4822 .end = false_expression->location.end,
4823 },
4824 },
4825 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4826 .predicate = predicate,
4827 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4828 .statements = if_statements,
4829 .subsequent = (pm_node_t *) else_node,
4830 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4831 };
4832
4833 return node;
4834
4835}
4836
4837static inline void
4838pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4839 node->base.location.end = keyword->end;
4840 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4841}
4842
4843static inline void
4844pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4845 node->base.location.end = keyword->end;
4846 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4847}
4848
4852static pm_implicit_node_t *
4853pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4854 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4855
4856 *node = (pm_implicit_node_t) {
4857 {
4858 .type = PM_IMPLICIT_NODE,
4859 .node_id = PM_NODE_IDENTIFY(parser),
4860 .location = value->location
4861 },
4862 .value = value
4863 };
4864
4865 return node;
4866}
4867
4871static pm_implicit_rest_node_t *
4872pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4873 assert(token->type == PM_TOKEN_COMMA);
4874
4875 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4876
4877 *node = (pm_implicit_rest_node_t) {
4878 {
4879 .type = PM_IMPLICIT_REST_NODE,
4880 .node_id = PM_NODE_IDENTIFY(parser),
4881 .location = PM_LOCATION_TOKEN_VALUE(token)
4882 }
4883 };
4884
4885 return node;
4886}
4887
4891static pm_integer_node_t *
4892pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4893 assert(token->type == PM_TOKEN_INTEGER);
4894 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4895
4896 *node = (pm_integer_node_t) {
4897 {
4898 .type = PM_INTEGER_NODE,
4899 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4900 .node_id = PM_NODE_IDENTIFY(parser),
4901 .location = PM_LOCATION_TOKEN_VALUE(token)
4902 },
4903 .value = { 0 }
4904 };
4905
4906 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4907 switch (base) {
4908 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4909 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4910 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4911 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4912 default: assert(false && "unreachable"); break;
4913 }
4914
4915 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4916 return node;
4917}
4918
4923static pm_imaginary_node_t *
4924pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4925 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4926
4927 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4928 *node = (pm_imaginary_node_t) {
4929 {
4930 .type = PM_IMAGINARY_NODE,
4931 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4932 .node_id = PM_NODE_IDENTIFY(parser),
4933 .location = PM_LOCATION_TOKEN_VALUE(token)
4934 },
4935 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4936 .type = PM_TOKEN_INTEGER,
4937 .start = token->start,
4938 .end = token->end - 1
4939 }))
4940 };
4941
4942 return node;
4943}
4944
4949static pm_rational_node_t *
4950pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4951 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4952
4953 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4954 *node = (pm_rational_node_t) {
4955 {
4956 .type = PM_RATIONAL_NODE,
4957 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4958 .node_id = PM_NODE_IDENTIFY(parser),
4959 .location = PM_LOCATION_TOKEN_VALUE(token)
4960 },
4961 .numerator = { 0 },
4962 .denominator = { .value = 1, 0 }
4963 };
4964
4965 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4966 switch (base) {
4967 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4968 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4969 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4970 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4971 default: assert(false && "unreachable"); break;
4972 }
4973
4974 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4975
4976 return node;
4977}
4978
4983static pm_imaginary_node_t *
4984pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4985 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4986
4987 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4988 *node = (pm_imaginary_node_t) {
4989 {
4990 .type = PM_IMAGINARY_NODE,
4991 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4992 .node_id = PM_NODE_IDENTIFY(parser),
4993 .location = PM_LOCATION_TOKEN_VALUE(token)
4994 },
4995 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4996 .type = PM_TOKEN_INTEGER_RATIONAL,
4997 .start = token->start,
4998 .end = token->end - 1
4999 }))
5000 };
5001
5002 return node;
5003}
5004
5008static pm_in_node_t *
5009pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5010 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5011
5012 const uint8_t *end;
5013 if (statements != NULL) {
5014 end = statements->base.location.end;
5015 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5016 end = then_keyword->end;
5017 } else {
5018 end = pattern->location.end;
5019 }
5020
5021 *node = (pm_in_node_t) {
5022 {
5023 .type = PM_IN_NODE,
5024 .node_id = PM_NODE_IDENTIFY(parser),
5025 .location = {
5026 .start = in_keyword->start,
5027 .end = end
5028 },
5029 },
5030 .pattern = pattern,
5031 .statements = statements,
5032 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5033 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5034 };
5035
5036 return node;
5037}
5038
5042static pm_instance_variable_and_write_node_t *
5043pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5044 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5045 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5046
5047 *node = (pm_instance_variable_and_write_node_t) {
5048 {
5049 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5050 .node_id = PM_NODE_IDENTIFY(parser),
5051 .location = {
5052 .start = target->base.location.start,
5053 .end = value->location.end
5054 }
5055 },
5056 .name = target->name,
5057 .name_loc = target->base.location,
5058 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5059 .value = value
5060 };
5061
5062 return node;
5063}
5064
5068static pm_instance_variable_operator_write_node_t *
5069pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5070 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5071
5072 *node = (pm_instance_variable_operator_write_node_t) {
5073 {
5074 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5075 .node_id = PM_NODE_IDENTIFY(parser),
5076 .location = {
5077 .start = target->base.location.start,
5078 .end = value->location.end
5079 }
5080 },
5081 .name = target->name,
5082 .name_loc = target->base.location,
5083 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5084 .value = value,
5085 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5086 };
5087
5088 return node;
5089}
5090
5094static pm_instance_variable_or_write_node_t *
5095pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5096 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5097 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5098
5099 *node = (pm_instance_variable_or_write_node_t) {
5100 {
5101 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5102 .node_id = PM_NODE_IDENTIFY(parser),
5103 .location = {
5104 .start = target->base.location.start,
5105 .end = value->location.end
5106 }
5107 },
5108 .name = target->name,
5109 .name_loc = target->base.location,
5110 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5111 .value = value
5112 };
5113
5114 return node;
5115}
5116
5120static pm_instance_variable_read_node_t *
5121pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5122 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5123 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5124
5125 *node = (pm_instance_variable_read_node_t) {
5126 {
5127 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5128 .node_id = PM_NODE_IDENTIFY(parser),
5129 .location = PM_LOCATION_TOKEN_VALUE(token)
5130 },
5131 .name = pm_parser_constant_id_token(parser, token)
5132 };
5133
5134 return node;
5135}
5136
5141static pm_instance_variable_write_node_t *
5142pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5143 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5144 *node = (pm_instance_variable_write_node_t) {
5145 {
5146 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5147 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5148 .node_id = PM_NODE_IDENTIFY(parser),
5149 .location = {
5150 .start = read_node->base.location.start,
5151 .end = value->location.end
5152 }
5153 },
5154 .name = read_node->name,
5155 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5156 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5157 .value = value
5158 };
5159
5160 return node;
5161}
5162
5168static void
5169pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5170 switch (PM_NODE_TYPE(part)) {
5171 case PM_STRING_NODE:
5172 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5173 break;
5174 case PM_EMBEDDED_STATEMENTS_NODE: {
5175 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5176 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5177
5178 if (embedded == NULL) {
5179 // If there are no statements or more than one statement, then
5180 // we lose the static literal flag.
5181 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5182 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5183 // If the embedded statement is a string, then we can keep the
5184 // static literal flag and mark the string as frozen.
5185 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5186 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5187 // If the embedded statement is an interpolated string and it's
5188 // a static literal, then we can keep the static literal flag.
5189 } else {
5190 // Otherwise we lose the static literal flag.
5191 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5192 }
5193
5194 break;
5195 }
5196 case PM_EMBEDDED_VARIABLE_NODE:
5197 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5198 break;
5199 default:
5200 assert(false && "unexpected node type");
5201 break;
5202 }
5203
5204 pm_node_list_append(parts, part);
5205}
5206
5210static pm_interpolated_regular_expression_node_t *
5211pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5212 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5213
5214 *node = (pm_interpolated_regular_expression_node_t) {
5215 {
5216 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5217 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5218 .node_id = PM_NODE_IDENTIFY(parser),
5219 .location = {
5220 .start = opening->start,
5221 .end = NULL,
5222 },
5223 },
5224 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5225 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5226 .parts = { 0 }
5227 };
5228
5229 return node;
5230}
5231
5232static inline void
5233pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5234 if (node->base.location.start > part->location.start) {
5235 node->base.location.start = part->location.start;
5236 }
5237 if (node->base.location.end < part->location.end) {
5238 node->base.location.end = part->location.end;
5239 }
5240
5241 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5242}
5243
5244static inline void
5245pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5246 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5247 node->base.location.end = closing->end;
5248 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5249}
5250
5274static inline void
5275pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5276#define CLEAR_FLAGS(node) \
5277 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5278
5279#define MUTABLE_FLAGS(node) \
5280 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5281
5282 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5283 node->base.location.start = part->location.start;
5284 }
5285
5286 node->base.location.end = MAX(node->base.location.end, part->location.end);
5287
5288 switch (PM_NODE_TYPE(part)) {
5289 case PM_STRING_NODE:
5290 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5291 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5292 // as long as this interpolation only consists of other string literals.
5293 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5294 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5295 }
5296 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5297 break;
5298 case PM_INTERPOLATED_STRING_NODE:
5299 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5300 // If the string that we're concatenating is a static literal,
5301 // then we can keep the static literal flag for this string.
5302 } else {
5303 // Otherwise, we lose the static literal flag here and we should
5304 // also clear the mutability flags.
5305 CLEAR_FLAGS(node);
5306 }
5307 break;
5308 case PM_EMBEDDED_STATEMENTS_NODE: {
5309 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5310 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5311
5312 if (embedded == NULL) {
5313 // If we're embedding multiple statements or no statements, then
5314 // the string is not longer a static literal.
5315 CLEAR_FLAGS(node);
5316 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5317 // If the embedded statement is a string, then we can make that
5318 // string as frozen and static literal, and not touch the static
5319 // literal status of this string.
5320 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5321
5322 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5323 MUTABLE_FLAGS(node);
5324 }
5325 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5326 // If the embedded statement is an interpolated string, but that
5327 // string is marked as static literal, then we can keep our
5328 // static literal status for this string.
5329 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5330 MUTABLE_FLAGS(node);
5331 }
5332 } else {
5333 // In all other cases, we lose the static literal flag here and
5334 // become mutable.
5335 CLEAR_FLAGS(node);
5336 }
5337
5338 break;
5339 }
5340 case PM_EMBEDDED_VARIABLE_NODE:
5341 // Embedded variables clear static literal, which means we also
5342 // should clear the mutability flags.
5343 CLEAR_FLAGS(node);
5344 break;
5345 case PM_X_STRING_NODE:
5346 case PM_INTERPOLATED_X_STRING_NODE:
5347 // If this is an x string, then this is a syntax error. But we want
5348 // to handle it here so that we don't fail the assertion.
5349 CLEAR_FLAGS(node);
5350 break;
5351 default:
5352 assert(false && "unexpected node type");
5353 break;
5354 }
5355
5356 pm_node_list_append(&node->parts, part);
5357
5358#undef CLEAR_FLAGS
5359#undef MUTABLE_FLAGS
5360}
5361
5365static pm_interpolated_string_node_t *
5366pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5367 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5368 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5369
5370 switch (parser->frozen_string_literal) {
5371 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5372 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5373 break;
5374 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5375 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5376 break;
5377 }
5378
5379 *node = (pm_interpolated_string_node_t) {
5380 {
5381 .type = PM_INTERPOLATED_STRING_NODE,
5382 .flags = flags,
5383 .node_id = PM_NODE_IDENTIFY(parser),
5384 .location = {
5385 .start = opening->start,
5386 .end = closing->end,
5387 },
5388 },
5389 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5390 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5391 .parts = { 0 }
5392 };
5393
5394 if (parts != NULL) {
5395 pm_node_t *part;
5396 PM_NODE_LIST_FOREACH(parts, index, part) {
5397 pm_interpolated_string_node_append(node, part);
5398 }
5399 }
5400
5401 return node;
5402}
5403
5407static void
5408pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5409 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5410 node->base.location.end = closing->end;
5411}
5412
5413static void
5414pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5415 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5416 node->base.location.start = part->location.start;
5417 }
5418
5419 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5420 node->base.location.end = MAX(node->base.location.end, part->location.end);
5421}
5422
5423static void
5424pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5425 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5426 node->base.location.end = closing->end;
5427}
5428
5432static pm_interpolated_symbol_node_t *
5433pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5434 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5435
5436 *node = (pm_interpolated_symbol_node_t) {
5437 {
5438 .type = PM_INTERPOLATED_SYMBOL_NODE,
5439 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5440 .node_id = PM_NODE_IDENTIFY(parser),
5441 .location = {
5442 .start = opening->start,
5443 .end = closing->end,
5444 },
5445 },
5446 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5447 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5448 .parts = { 0 }
5449 };
5450
5451 if (parts != NULL) {
5452 pm_node_t *part;
5453 PM_NODE_LIST_FOREACH(parts, index, part) {
5454 pm_interpolated_symbol_node_append(node, part);
5455 }
5456 }
5457
5458 return node;
5459}
5460
5464static pm_interpolated_x_string_node_t *
5465pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5466 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5467
5468 *node = (pm_interpolated_x_string_node_t) {
5469 {
5470 .type = PM_INTERPOLATED_X_STRING_NODE,
5471 .node_id = PM_NODE_IDENTIFY(parser),
5472 .location = {
5473 .start = opening->start,
5474 .end = closing->end
5475 },
5476 },
5477 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5478 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5479 .parts = { 0 }
5480 };
5481
5482 return node;
5483}
5484
5485static inline void
5486pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5487 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5488 node->base.location.end = part->location.end;
5489}
5490
5491static inline void
5492pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5493 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5494 node->base.location.end = closing->end;
5495}
5496
5500static pm_it_local_variable_read_node_t *
5501pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5502 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5503
5504 *node = (pm_it_local_variable_read_node_t) {
5505 {
5506 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5507 .node_id = PM_NODE_IDENTIFY(parser),
5508 .location = PM_LOCATION_TOKEN_VALUE(name)
5509 }
5510 };
5511
5512 return node;
5513}
5514
5518static pm_it_parameters_node_t *
5519pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5520 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5521
5522 *node = (pm_it_parameters_node_t) {
5523 {
5524 .type = PM_IT_PARAMETERS_NODE,
5525 .node_id = PM_NODE_IDENTIFY(parser),
5526 .location = {
5527 .start = opening->start,
5528 .end = closing->end
5529 }
5530 }
5531 };
5532
5533 return node;
5534}
5535
5539static pm_keyword_hash_node_t *
5540pm_keyword_hash_node_create(pm_parser_t *parser) {
5541 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5542
5543 *node = (pm_keyword_hash_node_t) {
5544 .base = {
5545 .type = PM_KEYWORD_HASH_NODE,
5546 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5547 .node_id = PM_NODE_IDENTIFY(parser),
5548 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5549 },
5550 .elements = { 0 }
5551 };
5552
5553 return node;
5554}
5555
5559static void
5560pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5561 // If the element being added is not an AssocNode or does not have a symbol
5562 // key, then we want to turn the SYMBOL_KEYS flag off.
5563 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5564 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5565 }
5566
5567 pm_node_list_append(&hash->elements, element);
5568 if (hash->base.location.start == NULL) {
5569 hash->base.location.start = element->location.start;
5570 }
5571 hash->base.location.end = element->location.end;
5572}
5573
5577static pm_required_keyword_parameter_node_t *
5578pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5579 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5580
5581 *node = (pm_required_keyword_parameter_node_t) {
5582 {
5583 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5584 .node_id = PM_NODE_IDENTIFY(parser),
5585 .location = {
5586 .start = name->start,
5587 .end = name->end
5588 },
5589 },
5590 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5591 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5592 };
5593
5594 return node;
5595}
5596
5600static pm_optional_keyword_parameter_node_t *
5601pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5602 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5603
5604 *node = (pm_optional_keyword_parameter_node_t) {
5605 {
5606 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5607 .node_id = PM_NODE_IDENTIFY(parser),
5608 .location = {
5609 .start = name->start,
5610 .end = value->location.end
5611 },
5612 },
5613 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5614 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5615 .value = value
5616 };
5617
5618 return node;
5619}
5620
5624static pm_keyword_rest_parameter_node_t *
5625pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5626 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5627
5628 *node = (pm_keyword_rest_parameter_node_t) {
5629 {
5630 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5631 .node_id = PM_NODE_IDENTIFY(parser),
5632 .location = {
5633 .start = operator->start,
5634 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5635 },
5636 },
5637 .name = pm_parser_optional_constant_id_token(parser, name),
5638 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5639 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5640 };
5641
5642 return node;
5643}
5644
5648static pm_lambda_node_t *
5649pm_lambda_node_create(
5650 pm_parser_t *parser,
5651 pm_constant_id_list_t *locals,
5652 const pm_token_t *operator,
5653 const pm_token_t *opening,
5654 const pm_token_t *closing,
5655 pm_node_t *parameters,
5656 pm_node_t *body
5657) {
5658 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5659
5660 *node = (pm_lambda_node_t) {
5661 {
5662 .type = PM_LAMBDA_NODE,
5663 .node_id = PM_NODE_IDENTIFY(parser),
5664 .location = {
5665 .start = operator->start,
5666 .end = closing->end
5667 },
5668 },
5669 .locals = *locals,
5670 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5671 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5672 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5673 .parameters = parameters,
5674 .body = body
5675 };
5676
5677 return node;
5678}
5679
5683static pm_local_variable_and_write_node_t *
5684pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5685 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5687 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5688
5689 *node = (pm_local_variable_and_write_node_t) {
5690 {
5691 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5692 .node_id = PM_NODE_IDENTIFY(parser),
5693 .location = {
5694 .start = target->location.start,
5695 .end = value->location.end
5696 }
5697 },
5698 .name_loc = target->location,
5699 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5700 .value = value,
5701 .name = name,
5702 .depth = depth
5703 };
5704
5705 return node;
5706}
5707
5711static pm_local_variable_operator_write_node_t *
5712pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5713 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5714
5715 *node = (pm_local_variable_operator_write_node_t) {
5716 {
5717 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5718 .node_id = PM_NODE_IDENTIFY(parser),
5719 .location = {
5720 .start = target->location.start,
5721 .end = value->location.end
5722 }
5723 },
5724 .name_loc = target->location,
5725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5726 .value = value,
5727 .name = name,
5728 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5729 .depth = depth
5730 };
5731
5732 return node;
5733}
5734
5738static pm_local_variable_or_write_node_t *
5739pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5740 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5741 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5742 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5743
5744 *node = (pm_local_variable_or_write_node_t) {
5745 {
5746 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5747 .node_id = PM_NODE_IDENTIFY(parser),
5748 .location = {
5749 .start = target->location.start,
5750 .end = value->location.end
5751 }
5752 },
5753 .name_loc = target->location,
5754 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5755 .value = value,
5756 .name = name,
5757 .depth = depth
5758 };
5759
5760 return node;
5761}
5762
5766static pm_local_variable_read_node_t *
5767pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5768 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5769
5770 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5771
5772 *node = (pm_local_variable_read_node_t) {
5773 {
5774 .type = PM_LOCAL_VARIABLE_READ_NODE,
5775 .node_id = PM_NODE_IDENTIFY(parser),
5776 .location = PM_LOCATION_TOKEN_VALUE(name)
5777 },
5778 .name = name_id,
5779 .depth = depth
5780 };
5781
5782 return node;
5783}
5784
5788static pm_local_variable_read_node_t *
5789pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5790 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5791 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5792}
5793
5798static pm_local_variable_read_node_t *
5799pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5800 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5801 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5802}
5803
5807static pm_local_variable_write_node_t *
5808pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5809 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5810
5811 *node = (pm_local_variable_write_node_t) {
5812 {
5813 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5814 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5815 .node_id = PM_NODE_IDENTIFY(parser),
5816 .location = {
5817 .start = name_loc->start,
5818 .end = value->location.end
5819 }
5820 },
5821 .name = name,
5822 .depth = depth,
5823 .value = value,
5824 .name_loc = *name_loc,
5825 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5826 };
5827
5828 return node;
5829}
5830
5834static inline bool
5835pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5836 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5837}
5838
5843static inline bool
5844pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5845 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5846}
5847
5852static inline void
5853pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5854 if (pm_token_is_numbered_parameter(start, end)) {
5855 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5856 }
5857}
5858
5863static pm_local_variable_target_node_t *
5864pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5865 pm_refute_numbered_parameter(parser, location->start, location->end);
5866 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5867
5868 *node = (pm_local_variable_target_node_t) {
5869 {
5870 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5871 .node_id = PM_NODE_IDENTIFY(parser),
5872 .location = *location
5873 },
5874 .name = name,
5875 .depth = depth
5876 };
5877
5878 return node;
5879}
5880
5884static pm_match_predicate_node_t *
5885pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5886 pm_assert_value_expression(parser, value);
5887
5888 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5889
5890 *node = (pm_match_predicate_node_t) {
5891 {
5892 .type = PM_MATCH_PREDICATE_NODE,
5893 .node_id = PM_NODE_IDENTIFY(parser),
5894 .location = {
5895 .start = value->location.start,
5896 .end = pattern->location.end
5897 }
5898 },
5899 .value = value,
5900 .pattern = pattern,
5901 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5902 };
5903
5904 return node;
5905}
5906
5910static pm_match_required_node_t *
5911pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5912 pm_assert_value_expression(parser, value);
5913
5914 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5915
5916 *node = (pm_match_required_node_t) {
5917 {
5918 .type = PM_MATCH_REQUIRED_NODE,
5919 .node_id = PM_NODE_IDENTIFY(parser),
5920 .location = {
5921 .start = value->location.start,
5922 .end = pattern->location.end
5923 }
5924 },
5925 .value = value,
5926 .pattern = pattern,
5927 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5928 };
5929
5930 return node;
5931}
5932
5936static pm_match_write_node_t *
5937pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5938 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5939
5940 *node = (pm_match_write_node_t) {
5941 {
5942 .type = PM_MATCH_WRITE_NODE,
5943 .node_id = PM_NODE_IDENTIFY(parser),
5944 .location = call->base.location
5945 },
5946 .call = call,
5947 .targets = { 0 }
5948 };
5949
5950 return node;
5951}
5952
5956static pm_module_node_t *
5957pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5958 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5959
5960 *node = (pm_module_node_t) {
5961 {
5962 .type = PM_MODULE_NODE,
5963 .node_id = PM_NODE_IDENTIFY(parser),
5964 .location = {
5965 .start = module_keyword->start,
5966 .end = end_keyword->end
5967 }
5968 },
5969 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5970 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5971 .constant_path = constant_path,
5972 .body = body,
5973 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5974 .name = pm_parser_constant_id_token(parser, name)
5975 };
5976
5977 return node;
5978}
5979
5983static pm_multi_target_node_t *
5984pm_multi_target_node_create(pm_parser_t *parser) {
5985 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5986
5987 *node = (pm_multi_target_node_t) {
5988 {
5989 .type = PM_MULTI_TARGET_NODE,
5990 .node_id = PM_NODE_IDENTIFY(parser),
5991 .location = { .start = NULL, .end = NULL }
5992 },
5993 .lefts = { 0 },
5994 .rest = NULL,
5995 .rights = { 0 },
5996 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5997 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5998 };
5999
6000 return node;
6001}
6002
6006static void
6007pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
6008 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
6009 if (node->rest == NULL) {
6010 node->rest = target;
6011 } else {
6012 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
6013 pm_node_list_append(&node->rights, target);
6014 }
6015 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6016 if (node->rest == NULL) {
6017 node->rest = target;
6018 } else {
6019 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6020 pm_node_list_append(&node->rights, target);
6021 }
6022 } else if (node->rest == NULL) {
6023 pm_node_list_append(&node->lefts, target);
6024 } else {
6025 pm_node_list_append(&node->rights, target);
6026 }
6027
6028 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6029 node->base.location.start = target->location.start;
6030 }
6031
6032 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6033 node->base.location.end = target->location.end;
6034 }
6035}
6036
6040static void
6041pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6042 node->base.location.start = lparen->start;
6043 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6044}
6045
6049static void
6050pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6051 node->base.location.end = rparen->end;
6052 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6053}
6054
6058static pm_multi_write_node_t *
6059pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6060 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6061
6062 *node = (pm_multi_write_node_t) {
6063 {
6064 .type = PM_MULTI_WRITE_NODE,
6065 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6066 .node_id = PM_NODE_IDENTIFY(parser),
6067 .location = {
6068 .start = target->base.location.start,
6069 .end = value->location.end
6070 }
6071 },
6072 .lefts = target->lefts,
6073 .rest = target->rest,
6074 .rights = target->rights,
6075 .lparen_loc = target->lparen_loc,
6076 .rparen_loc = target->rparen_loc,
6077 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6078 .value = value
6079 };
6080
6081 // Explicitly do not call pm_node_destroy here because we want to keep
6082 // around all of the information within the MultiWriteNode node.
6083 xfree(target);
6084
6085 return node;
6086}
6087
6091static pm_next_node_t *
6092pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6093 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6094 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6095
6096 *node = (pm_next_node_t) {
6097 {
6098 .type = PM_NEXT_NODE,
6099 .node_id = PM_NODE_IDENTIFY(parser),
6100 .location = {
6101 .start = keyword->start,
6102 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6103 }
6104 },
6105 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6106 .arguments = arguments
6107 };
6108
6109 return node;
6110}
6111
6115static pm_nil_node_t *
6116pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6117 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6118 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6119
6120 *node = (pm_nil_node_t) {{
6121 .type = PM_NIL_NODE,
6122 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6123 .node_id = PM_NODE_IDENTIFY(parser),
6124 .location = PM_LOCATION_TOKEN_VALUE(token)
6125 }};
6126
6127 return node;
6128}
6129
6133static pm_no_keywords_parameter_node_t *
6134pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6135 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6136 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6137 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6138
6139 *node = (pm_no_keywords_parameter_node_t) {
6140 {
6141 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6142 .node_id = PM_NODE_IDENTIFY(parser),
6143 .location = {
6144 .start = operator->start,
6145 .end = keyword->end
6146 }
6147 },
6148 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6149 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6150 };
6151
6152 return node;
6153}
6154
6158static pm_numbered_parameters_node_t *
6159pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6160 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6161
6162 *node = (pm_numbered_parameters_node_t) {
6163 {
6164 .type = PM_NUMBERED_PARAMETERS_NODE,
6165 .node_id = PM_NODE_IDENTIFY(parser),
6166 .location = *location
6167 },
6168 .maximum = maximum
6169 };
6170
6171 return node;
6172}
6173
6178#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6179
6186static uint32_t
6187pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6188 const uint8_t *start = token->start + 1;
6189 const uint8_t *end = token->end;
6190
6191 ptrdiff_t diff = end - start;
6192 assert(diff > 0);
6193#if PTRDIFF_MAX > SIZE_MAX
6194 assert(diff < (ptrdiff_t) SIZE_MAX);
6195#endif
6196 size_t length = (size_t) diff;
6197
6198 char *digits = xcalloc(length + 1, sizeof(char));
6199 memcpy(digits, start, length);
6200 digits[length] = '\0';
6201
6202 char *endptr;
6203 errno = 0;
6204 unsigned long value = strtoul(digits, &endptr, 10);
6205
6206 if ((digits == endptr) || (*endptr != '\0')) {
6207 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6208 value = 0;
6209 }
6210
6211 xfree(digits);
6212
6213 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6214 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6215 value = 0;
6216 }
6217
6218 return (uint32_t) value;
6219}
6220
6221#undef NTH_REF_MAX
6222
6226static pm_numbered_reference_read_node_t *
6227pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6228 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6229 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6230
6231 *node = (pm_numbered_reference_read_node_t) {
6232 {
6233 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6234 .node_id = PM_NODE_IDENTIFY(parser),
6235 .location = PM_LOCATION_TOKEN_VALUE(name),
6236 },
6237 .number = pm_numbered_reference_read_node_number(parser, name)
6238 };
6239
6240 return node;
6241}
6242
6246static pm_optional_parameter_node_t *
6247pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6248 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6249
6250 *node = (pm_optional_parameter_node_t) {
6251 {
6252 .type = PM_OPTIONAL_PARAMETER_NODE,
6253 .node_id = PM_NODE_IDENTIFY(parser),
6254 .location = {
6255 .start = name->start,
6256 .end = value->location.end
6257 }
6258 },
6259 .name = pm_parser_constant_id_token(parser, name),
6260 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6261 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6262 .value = value
6263 };
6264
6265 return node;
6266}
6267
6271static pm_or_node_t *
6272pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6273 pm_assert_value_expression(parser, left);
6274
6275 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6276
6277 *node = (pm_or_node_t) {
6278 {
6279 .type = PM_OR_NODE,
6280 .node_id = PM_NODE_IDENTIFY(parser),
6281 .location = {
6282 .start = left->location.start,
6283 .end = right->location.end
6284 }
6285 },
6286 .left = left,
6287 .right = right,
6288 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6289 };
6290
6291 return node;
6292}
6293
6297static pm_parameters_node_t *
6298pm_parameters_node_create(pm_parser_t *parser) {
6299 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6300
6301 *node = (pm_parameters_node_t) {
6302 {
6303 .type = PM_PARAMETERS_NODE,
6304 .node_id = PM_NODE_IDENTIFY(parser),
6305 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6306 },
6307 .rest = NULL,
6308 .keyword_rest = NULL,
6309 .block = NULL,
6310 .requireds = { 0 },
6311 .optionals = { 0 },
6312 .posts = { 0 },
6313 .keywords = { 0 }
6314 };
6315
6316 return node;
6317}
6318
6322static void
6323pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6324 if (params->base.location.start == NULL) {
6325 params->base.location.start = param->location.start;
6326 } else {
6327 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6328 }
6329
6330 if (params->base.location.end == NULL) {
6331 params->base.location.end = param->location.end;
6332 } else {
6333 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6334 }
6335}
6336
6340static void
6341pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6342 pm_parameters_node_location_set(params, param);
6343 pm_node_list_append(&params->requireds, param);
6344}
6345
6349static void
6350pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6351 pm_parameters_node_location_set(params, (pm_node_t *) param);
6352 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6353}
6354
6358static void
6359pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6360 pm_parameters_node_location_set(params, param);
6361 pm_node_list_append(&params->posts, param);
6362}
6363
6367static void
6368pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6369 pm_parameters_node_location_set(params, param);
6370 params->rest = param;
6371}
6372
6376static void
6377pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6378 pm_parameters_node_location_set(params, param);
6379 pm_node_list_append(&params->keywords, param);
6380}
6381
6385static void
6386pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6387 assert(params->keyword_rest == NULL);
6388 pm_parameters_node_location_set(params, param);
6389 params->keyword_rest = param;
6390}
6391
6395static void
6396pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6397 assert(params->block == NULL);
6398 pm_parameters_node_location_set(params, (pm_node_t *) param);
6399 params->block = param;
6400}
6401
6405static pm_program_node_t *
6406pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6407 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6408
6409 *node = (pm_program_node_t) {
6410 {
6411 .type = PM_PROGRAM_NODE,
6412 .node_id = PM_NODE_IDENTIFY(parser),
6413 .location = {
6414 .start = statements == NULL ? parser->start : statements->base.location.start,
6415 .end = statements == NULL ? parser->end : statements->base.location.end
6416 }
6417 },
6418 .locals = *locals,
6419 .statements = statements
6420 };
6421
6422 return node;
6423}
6424
6428static pm_parentheses_node_t *
6429pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6430 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6431
6432 *node = (pm_parentheses_node_t) {
6433 {
6434 .type = PM_PARENTHESES_NODE,
6435 .flags = flags,
6436 .node_id = PM_NODE_IDENTIFY(parser),
6437 .location = {
6438 .start = opening->start,
6439 .end = closing->end
6440 }
6441 },
6442 .body = body,
6443 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6444 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6445 };
6446
6447 return node;
6448}
6449
6453static pm_pinned_expression_node_t *
6454pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6455 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6456
6457 *node = (pm_pinned_expression_node_t) {
6458 {
6459 .type = PM_PINNED_EXPRESSION_NODE,
6460 .node_id = PM_NODE_IDENTIFY(parser),
6461 .location = {
6462 .start = operator->start,
6463 .end = rparen->end
6464 }
6465 },
6466 .expression = expression,
6467 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6468 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6469 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6470 };
6471
6472 return node;
6473}
6474
6478static pm_pinned_variable_node_t *
6479pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6480 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6481
6482 *node = (pm_pinned_variable_node_t) {
6483 {
6484 .type = PM_PINNED_VARIABLE_NODE,
6485 .node_id = PM_NODE_IDENTIFY(parser),
6486 .location = {
6487 .start = operator->start,
6488 .end = variable->location.end
6489 }
6490 },
6491 .variable = variable,
6492 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6493 };
6494
6495 return node;
6496}
6497
6501static pm_post_execution_node_t *
6502pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6503 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6504
6505 *node = (pm_post_execution_node_t) {
6506 {
6507 .type = PM_POST_EXECUTION_NODE,
6508 .node_id = PM_NODE_IDENTIFY(parser),
6509 .location = {
6510 .start = keyword->start,
6511 .end = closing->end
6512 }
6513 },
6514 .statements = statements,
6515 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6516 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6517 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6518 };
6519
6520 return node;
6521}
6522
6526static pm_pre_execution_node_t *
6527pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6528 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6529
6530 *node = (pm_pre_execution_node_t) {
6531 {
6532 .type = PM_PRE_EXECUTION_NODE,
6533 .node_id = PM_NODE_IDENTIFY(parser),
6534 .location = {
6535 .start = keyword->start,
6536 .end = closing->end
6537 }
6538 },
6539 .statements = statements,
6540 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6541 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6542 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6543 };
6544
6545 return node;
6546}
6547
6551static pm_range_node_t *
6552pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6553 pm_assert_value_expression(parser, left);
6554 pm_assert_value_expression(parser, right);
6555
6556 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6557 pm_node_flags_t flags = 0;
6558
6559 // Indicate that this node is an exclusive range if the operator is `...`.
6560 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6561 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6562 }
6563
6564 // Indicate that this node is a static literal (i.e., can be compiled with
6565 // a putobject in CRuby) if the left and right are implicit nil, explicit
6566 // nil, or integers.
6567 if (
6568 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6569 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6570 ) {
6571 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6572 }
6573
6574 *node = (pm_range_node_t) {
6575 {
6576 .type = PM_RANGE_NODE,
6577 .flags = flags,
6578 .node_id = PM_NODE_IDENTIFY(parser),
6579 .location = {
6580 .start = (left == NULL ? operator->start : left->location.start),
6581 .end = (right == NULL ? operator->end : right->location.end)
6582 }
6583 },
6584 .left = left,
6585 .right = right,
6586 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6587 };
6588
6589 return node;
6590}
6591
6595static pm_redo_node_t *
6596pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6597 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6598 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6599
6600 *node = (pm_redo_node_t) {{
6601 .type = PM_REDO_NODE,
6602 .node_id = PM_NODE_IDENTIFY(parser),
6603 .location = PM_LOCATION_TOKEN_VALUE(token)
6604 }};
6605
6606 return node;
6607}
6608
6613static pm_regular_expression_node_t *
6614pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6615 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6616
6617 *node = (pm_regular_expression_node_t) {
6618 {
6619 .type = PM_REGULAR_EXPRESSION_NODE,
6620 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6621 .node_id = PM_NODE_IDENTIFY(parser),
6622 .location = {
6623 .start = MIN(opening->start, closing->start),
6624 .end = MAX(opening->end, closing->end)
6625 }
6626 },
6627 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6628 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6629 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6630 .unescaped = *unescaped
6631 };
6632
6633 return node;
6634}
6635
6639static inline pm_regular_expression_node_t *
6640pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6641 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6642}
6643
6647static pm_required_parameter_node_t *
6648pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6649 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6650
6651 *node = (pm_required_parameter_node_t) {
6652 {
6653 .type = PM_REQUIRED_PARAMETER_NODE,
6654 .node_id = PM_NODE_IDENTIFY(parser),
6655 .location = PM_LOCATION_TOKEN_VALUE(token)
6656 },
6657 .name = pm_parser_constant_id_token(parser, token)
6658 };
6659
6660 return node;
6661}
6662
6666static pm_rescue_modifier_node_t *
6667pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6668 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6669
6670 *node = (pm_rescue_modifier_node_t) {
6671 {
6672 .type = PM_RESCUE_MODIFIER_NODE,
6673 .node_id = PM_NODE_IDENTIFY(parser),
6674 .location = {
6675 .start = expression->location.start,
6676 .end = rescue_expression->location.end
6677 }
6678 },
6679 .expression = expression,
6680 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6681 .rescue_expression = rescue_expression
6682 };
6683
6684 return node;
6685}
6686
6690static pm_rescue_node_t *
6691pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6692 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6693
6694 *node = (pm_rescue_node_t) {
6695 {
6696 .type = PM_RESCUE_NODE,
6697 .node_id = PM_NODE_IDENTIFY(parser),
6698 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6699 },
6700 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6701 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6702 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6703 .reference = NULL,
6704 .statements = NULL,
6705 .subsequent = NULL,
6706 .exceptions = { 0 }
6707 };
6708
6709 return node;
6710}
6711
6712static inline void
6713pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6714 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6715}
6716
6720static void
6721pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6722 node->reference = reference;
6723 node->base.location.end = reference->location.end;
6724}
6725
6729static void
6730pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6731 node->statements = statements;
6732 if (pm_statements_node_body_length(statements) > 0) {
6733 node->base.location.end = statements->base.location.end;
6734 }
6735}
6736
6740static void
6741pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6742 node->subsequent = subsequent;
6743 node->base.location.end = subsequent->base.location.end;
6744}
6745
6749static void
6750pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6751 pm_node_list_append(&node->exceptions, exception);
6752 node->base.location.end = exception->location.end;
6753}
6754
6758static pm_rest_parameter_node_t *
6759pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6760 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6761
6762 *node = (pm_rest_parameter_node_t) {
6763 {
6764 .type = PM_REST_PARAMETER_NODE,
6765 .node_id = PM_NODE_IDENTIFY(parser),
6766 .location = {
6767 .start = operator->start,
6768 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6769 }
6770 },
6771 .name = pm_parser_optional_constant_id_token(parser, name),
6772 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6773 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6774 };
6775
6776 return node;
6777}
6778
6782static pm_retry_node_t *
6783pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6784 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6785 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6786
6787 *node = (pm_retry_node_t) {{
6788 .type = PM_RETRY_NODE,
6789 .node_id = PM_NODE_IDENTIFY(parser),
6790 .location = PM_LOCATION_TOKEN_VALUE(token)
6791 }};
6792
6793 return node;
6794}
6795
6799static pm_return_node_t *
6800pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6801 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6802
6803 *node = (pm_return_node_t) {
6804 {
6805 .type = PM_RETURN_NODE,
6806 .node_id = PM_NODE_IDENTIFY(parser),
6807 .location = {
6808 .start = keyword->start,
6809 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6810 }
6811 },
6812 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6813 .arguments = arguments
6814 };
6815
6816 return node;
6817}
6818
6822static pm_self_node_t *
6823pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6824 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6825 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6826
6827 *node = (pm_self_node_t) {{
6828 .type = PM_SELF_NODE,
6829 .node_id = PM_NODE_IDENTIFY(parser),
6830 .location = PM_LOCATION_TOKEN_VALUE(token)
6831 }};
6832
6833 return node;
6834}
6835
6839static pm_shareable_constant_node_t *
6840pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6841 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6842
6843 *node = (pm_shareable_constant_node_t) {
6844 {
6845 .type = PM_SHAREABLE_CONSTANT_NODE,
6846 .flags = (pm_node_flags_t) value,
6847 .node_id = PM_NODE_IDENTIFY(parser),
6848 .location = PM_LOCATION_NODE_VALUE(write)
6849 },
6850 .write = write
6851 };
6852
6853 return node;
6854}
6855
6859static pm_singleton_class_node_t *
6860pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6861 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6862
6863 *node = (pm_singleton_class_node_t) {
6864 {
6865 .type = PM_SINGLETON_CLASS_NODE,
6866 .node_id = PM_NODE_IDENTIFY(parser),
6867 .location = {
6868 .start = class_keyword->start,
6869 .end = end_keyword->end
6870 }
6871 },
6872 .locals = *locals,
6873 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6874 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6875 .expression = expression,
6876 .body = body,
6877 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6878 };
6879
6880 return node;
6881}
6882
6886static pm_source_encoding_node_t *
6887pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6888 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6889 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6890
6891 *node = (pm_source_encoding_node_t) {{
6892 .type = PM_SOURCE_ENCODING_NODE,
6893 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6894 .node_id = PM_NODE_IDENTIFY(parser),
6895 .location = PM_LOCATION_TOKEN_VALUE(token)
6896 }};
6897
6898 return node;
6899}
6900
6904static pm_source_file_node_t*
6905pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6906 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6907 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6908
6909 pm_node_flags_t flags = 0;
6910
6911 switch (parser->frozen_string_literal) {
6912 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6913 flags |= PM_STRING_FLAGS_MUTABLE;
6914 break;
6915 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6916 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6917 break;
6918 }
6919
6920 *node = (pm_source_file_node_t) {
6921 {
6922 .type = PM_SOURCE_FILE_NODE,
6923 .flags = flags,
6924 .node_id = PM_NODE_IDENTIFY(parser),
6925 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6926 },
6927 .filepath = parser->filepath
6928 };
6929
6930 return node;
6931}
6932
6936static pm_source_line_node_t *
6937pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6938 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6939 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6940
6941 *node = (pm_source_line_node_t) {{
6942 .type = PM_SOURCE_LINE_NODE,
6943 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6944 .node_id = PM_NODE_IDENTIFY(parser),
6945 .location = PM_LOCATION_TOKEN_VALUE(token)
6946 }};
6947
6948 return node;
6949}
6950
6954static pm_splat_node_t *
6955pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6956 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6957
6958 *node = (pm_splat_node_t) {
6959 {
6960 .type = PM_SPLAT_NODE,
6961 .node_id = PM_NODE_IDENTIFY(parser),
6962 .location = {
6963 .start = operator->start,
6964 .end = (expression == NULL ? operator->end : expression->location.end)
6965 }
6966 },
6967 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6968 .expression = expression
6969 };
6970
6971 return node;
6972}
6973
6977static pm_statements_node_t *
6978pm_statements_node_create(pm_parser_t *parser) {
6979 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6980
6981 *node = (pm_statements_node_t) {
6982 {
6983 .type = PM_STATEMENTS_NODE,
6984 .node_id = PM_NODE_IDENTIFY(parser),
6985 .location = PM_LOCATION_NULL_VALUE(parser)
6986 },
6987 .body = { 0 }
6988 };
6989
6990 return node;
6991}
6992
6996static size_t
6997pm_statements_node_body_length(pm_statements_node_t *node) {
6998 return node && node->body.size;
6999}
7000
7004static void
7005pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
7006 node->base.location = (pm_location_t) { .start = start, .end = end };
7007}
7008
7013static inline void
7014pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7015 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7016 node->base.location.start = statement->location.start;
7017 }
7018
7019 if (statement->location.end > node->base.location.end) {
7020 node->base.location.end = statement->location.end;
7021 }
7022}
7023
7027static void
7028pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7029 pm_statements_node_body_update(node, statement);
7030
7031 if (node->body.size > 0) {
7032 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7033
7034 switch (PM_NODE_TYPE(previous)) {
7035 case PM_BREAK_NODE:
7036 case PM_NEXT_NODE:
7037 case PM_REDO_NODE:
7038 case PM_RETRY_NODE:
7039 case PM_RETURN_NODE:
7040 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7041 break;
7042 default:
7043 break;
7044 }
7045 }
7046
7047 pm_node_list_append(&node->body, statement);
7048 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7049}
7050
7054static void
7055pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7056 pm_statements_node_body_update(node, statement);
7057 pm_node_list_prepend(&node->body, statement);
7058 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7059}
7060
7064static inline pm_string_node_t *
7065pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7066 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7067 pm_node_flags_t flags = 0;
7068
7069 switch (parser->frozen_string_literal) {
7070 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7071 flags = PM_STRING_FLAGS_MUTABLE;
7072 break;
7073 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7074 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7075 break;
7076 }
7077
7078 *node = (pm_string_node_t) {
7079 {
7080 .type = PM_STRING_NODE,
7081 .flags = flags,
7082 .node_id = PM_NODE_IDENTIFY(parser),
7083 .location = {
7084 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7085 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7086 }
7087 },
7088 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7089 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7090 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7091 .unescaped = *string
7092 };
7093
7094 return node;
7095}
7096
7100static pm_string_node_t *
7101pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7102 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7103}
7104
7109static pm_string_node_t *
7110pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7111 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7112 parser->current_string = PM_STRING_EMPTY;
7113 return node;
7114}
7115
7119static pm_super_node_t *
7120pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7121 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7122 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7123
7124 const uint8_t *end = pm_arguments_end(arguments);
7125 if (end == NULL) {
7126 assert(false && "unreachable");
7127 }
7128
7129 *node = (pm_super_node_t) {
7130 {
7131 .type = PM_SUPER_NODE,
7132 .node_id = PM_NODE_IDENTIFY(parser),
7133 .location = {
7134 .start = keyword->start,
7135 .end = end,
7136 }
7137 },
7138 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7139 .lparen_loc = arguments->opening_loc,
7140 .arguments = arguments->arguments,
7141 .rparen_loc = arguments->closing_loc,
7142 .block = arguments->block
7143 };
7144
7145 return node;
7146}
7147
7152static bool
7153pm_ascii_only_p(const pm_string_t *contents) {
7154 const size_t length = pm_string_length(contents);
7155 const uint8_t *source = pm_string_source(contents);
7156
7157 for (size_t index = 0; index < length; index++) {
7158 if (source[index] & 0x80) return false;
7159 }
7160
7161 return true;
7162}
7163
7167static void
7168parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7169 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7170 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7171
7172 if (width == 0) {
7173 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7174 break;
7175 }
7176
7177 cursor += width;
7178 }
7179}
7180
7185static void
7186parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7187 const pm_encoding_t *encoding = parser->encoding;
7188
7189 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7190 size_t width = encoding->char_width(cursor, end - cursor);
7191
7192 if (width == 0) {
7193 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7194 break;
7195 }
7196
7197 cursor += width;
7198 }
7199}
7200
7210static inline pm_node_flags_t
7211parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7212 if (parser->explicit_encoding != NULL) {
7213 // A Symbol may optionally have its encoding explicitly set. This will
7214 // happen if an escape sequence results in a non-ASCII code point.
7215 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7216 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7217 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7218 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7219 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7220 } else if (validate) {
7221 parse_symbol_encoding_validate_other(parser, location, contents);
7222 }
7223 } else if (pm_ascii_only_p(contents)) {
7224 // Ruby stipulates that all source files must use an ASCII-compatible
7225 // encoding. Thus, all symbols appearing in source are eligible for
7226 // "downgrading" to US-ASCII.
7227 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7228 } else if (validate) {
7229 parse_symbol_encoding_validate_other(parser, location, contents);
7230 }
7231
7232 return 0;
7233}
7234
7235static pm_node_flags_t
7236parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7237 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7238 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7239 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7240 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7241
7242 // There's special validation logic used if a string does not contain any character escape sequences.
7243 if (parser->explicit_encoding == NULL) {
7244 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7245 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7246 // the US-ASCII encoding.
7247 if (ascii_only) {
7248 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7249 }
7250
7251 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7252 if (!ascii_only) {
7253 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7254 }
7255 } else if (parser->encoding != modifier_encoding) {
7256 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7257
7258 if (modifier == 'n' && !ascii_only) {
7259 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7260 }
7261 }
7262
7263 return flags;
7264 }
7265
7266 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7267 bool mixed_encoding = false;
7268
7269 if (mixed_encoding) {
7270 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7271 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7272 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7273 bool valid_string_in_modifier_encoding = true;
7274
7275 if (!valid_string_in_modifier_encoding) {
7276 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7277 }
7278 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7279 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7280 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7281 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7282 }
7283 }
7284
7285 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7286 return flags;
7287}
7288
7295static pm_node_flags_t
7296parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7297 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7298 bool valid_unicode_range = true;
7299 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7300 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7301 return flags;
7302 }
7303
7304 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7305 // to multi-byte characters are allowed.
7306 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7307 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7308 // following error message appearing twice. We do the same for compatibility.
7309 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7310 }
7311
7320 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7321 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7322 }
7323
7324 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7325 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7326 }
7327
7328 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7329 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7330 }
7331
7332 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7333 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7334 }
7335
7336 // At this point no encoding modifiers will be present on the regular expression as they would have already
7337 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7338 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7339 if (ascii_only) {
7340 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7341 }
7342
7343 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7344 // or by specifying a modifier.
7345 //
7346 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7347 if (parser->explicit_encoding != NULL) {
7348 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7349 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7350 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7351 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7352 }
7353 }
7354
7355 return 0;
7356}
7357
7362static pm_symbol_node_t *
7363pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7364 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7365
7366 *node = (pm_symbol_node_t) {
7367 {
7368 .type = PM_SYMBOL_NODE,
7369 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7370 .node_id = PM_NODE_IDENTIFY(parser),
7371 .location = {
7372 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7373 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7374 }
7375 },
7376 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7377 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7378 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7379 .unescaped = *unescaped
7380 };
7381
7382 return node;
7383}
7384
7388static inline pm_symbol_node_t *
7389pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7390 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7391}
7392
7396static pm_symbol_node_t *
7397pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7398 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7399 parser->current_string = PM_STRING_EMPTY;
7400 return node;
7401}
7402
7406static pm_symbol_node_t *
7407pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7408 pm_symbol_node_t *node;
7409
7410 switch (token->type) {
7411 case PM_TOKEN_LABEL: {
7412 pm_token_t opening = not_provided(parser);
7413 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7414
7415 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7416 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7417
7418 assert((label.end - label.start) >= 0);
7419 pm_string_shared_init(&node->unescaped, label.start, label.end);
7420 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7421
7422 break;
7423 }
7424 case PM_TOKEN_MISSING: {
7425 pm_token_t opening = not_provided(parser);
7426 pm_token_t closing = not_provided(parser);
7427
7428 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7429 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7430 break;
7431 }
7432 default:
7433 assert(false && "unreachable");
7434 node = NULL;
7435 break;
7436 }
7437
7438 return node;
7439}
7440
7444static pm_symbol_node_t *
7445pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7446 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7447
7448 *node = (pm_symbol_node_t) {
7449 {
7450 .type = PM_SYMBOL_NODE,
7451 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7452 .node_id = PM_NODE_IDENTIFY(parser),
7453 .location = PM_LOCATION_NULL_VALUE(parser)
7454 },
7455 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7456 .unescaped = { 0 }
7457 };
7458
7459 pm_string_constant_init(&node->unescaped, content, strlen(content));
7460 return node;
7461}
7462
7466static bool
7467pm_symbol_node_label_p(pm_node_t *node) {
7468 const uint8_t *end = NULL;
7469
7470 switch (PM_NODE_TYPE(node)) {
7471 case PM_SYMBOL_NODE:
7472 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7473 break;
7474 case PM_INTERPOLATED_SYMBOL_NODE:
7475 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7476 break;
7477 default:
7478 return false;
7479 }
7480
7481 return (end != NULL) && (end[-1] == ':');
7482}
7483
7487static pm_symbol_node_t *
7488pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7489 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7490
7491 *new_node = (pm_symbol_node_t) {
7492 {
7493 .type = PM_SYMBOL_NODE,
7494 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7495 .node_id = PM_NODE_IDENTIFY(parser),
7496 .location = {
7497 .start = opening->start,
7498 .end = closing->end
7499 }
7500 },
7501 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7502 .value_loc = node->content_loc,
7503 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7504 .unescaped = node->unescaped
7505 };
7506
7507 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7508 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7509
7510 // We are explicitly _not_ using pm_node_destroy here because we don't want
7511 // to trash the unescaped string. We could instead copy the string if we
7512 // know that it is owned, but we're taking the fast path for now.
7513 xfree(node);
7514
7515 return new_node;
7516}
7517
7521static pm_string_node_t *
7522pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7523 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7524 pm_node_flags_t flags = 0;
7525
7526 switch (parser->frozen_string_literal) {
7527 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7528 flags = PM_STRING_FLAGS_MUTABLE;
7529 break;
7530 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7531 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7532 break;
7533 }
7534
7535 *new_node = (pm_string_node_t) {
7536 {
7537 .type = PM_STRING_NODE,
7538 .flags = flags,
7539 .node_id = PM_NODE_IDENTIFY(parser),
7540 .location = node->base.location
7541 },
7542 .opening_loc = node->opening_loc,
7543 .content_loc = node->value_loc,
7544 .closing_loc = node->closing_loc,
7545 .unescaped = node->unescaped
7546 };
7547
7548 // We are explicitly _not_ using pm_node_destroy here because we don't want
7549 // to trash the unescaped string. We could instead copy the string if we
7550 // know that it is owned, but we're taking the fast path for now.
7551 xfree(node);
7552
7553 return new_node;
7554}
7555
7559static pm_true_node_t *
7560pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7561 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7562 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7563
7564 *node = (pm_true_node_t) {{
7565 .type = PM_TRUE_NODE,
7566 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7567 .node_id = PM_NODE_IDENTIFY(parser),
7568 .location = PM_LOCATION_TOKEN_VALUE(token)
7569 }};
7570
7571 return node;
7572}
7573
7577static pm_true_node_t *
7578pm_true_node_synthesized_create(pm_parser_t *parser) {
7579 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7580
7581 *node = (pm_true_node_t) {{
7582 .type = PM_TRUE_NODE,
7583 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7584 .node_id = PM_NODE_IDENTIFY(parser),
7585 .location = { .start = parser->start, .end = parser->end }
7586 }};
7587
7588 return node;
7589}
7590
7594static pm_undef_node_t *
7595pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7596 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7597 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7598
7599 *node = (pm_undef_node_t) {
7600 {
7601 .type = PM_UNDEF_NODE,
7602 .node_id = PM_NODE_IDENTIFY(parser),
7603 .location = PM_LOCATION_TOKEN_VALUE(token),
7604 },
7605 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7606 .names = { 0 }
7607 };
7608
7609 return node;
7610}
7611
7615static void
7616pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7617 node->base.location.end = name->location.end;
7618 pm_node_list_append(&node->names, name);
7619}
7620
7624static pm_unless_node_t *
7625pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7626 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7627 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7628
7629 const uint8_t *end;
7630 if (statements != NULL) {
7631 end = statements->base.location.end;
7632 } else {
7633 end = predicate->location.end;
7634 }
7635
7636 *node = (pm_unless_node_t) {
7637 {
7638 .type = PM_UNLESS_NODE,
7639 .flags = PM_NODE_FLAG_NEWLINE,
7640 .node_id = PM_NODE_IDENTIFY(parser),
7641 .location = {
7642 .start = keyword->start,
7643 .end = end
7644 },
7645 },
7646 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7647 .predicate = predicate,
7648 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7649 .statements = statements,
7650 .else_clause = NULL,
7651 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7652 };
7653
7654 return node;
7655}
7656
7660static pm_unless_node_t *
7661pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7662 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7663 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7664
7665 pm_statements_node_t *statements = pm_statements_node_create(parser);
7666 pm_statements_node_body_append(parser, statements, statement, true);
7667
7668 *node = (pm_unless_node_t) {
7669 {
7670 .type = PM_UNLESS_NODE,
7671 .flags = PM_NODE_FLAG_NEWLINE,
7672 .node_id = PM_NODE_IDENTIFY(parser),
7673 .location = {
7674 .start = statement->location.start,
7675 .end = predicate->location.end
7676 },
7677 },
7678 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7679 .predicate = predicate,
7680 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7681 .statements = statements,
7682 .else_clause = NULL,
7683 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7684 };
7685
7686 return node;
7687}
7688
7689static inline void
7690pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7691 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7692 node->base.location.end = end_keyword->end;
7693}
7694
7700static void
7701pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7702 assert(parser->current_block_exits != NULL);
7703
7704 // All of the block exits that we want to remove should be within the
7705 // statements, and since we are modifying the statements, we shouldn't have
7706 // to check the end location.
7707 const uint8_t *start = statements->base.location.start;
7708
7709 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7710 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7711 if (block_exit->location.start < start) break;
7712
7713 // Implicitly remove from the list by lowering the size.
7714 parser->current_block_exits->size--;
7715 }
7716}
7717
7721static pm_until_node_t *
7722pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7723 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7724 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7725
7726 *node = (pm_until_node_t) {
7727 {
7728 .type = PM_UNTIL_NODE,
7729 .flags = flags,
7730 .node_id = PM_NODE_IDENTIFY(parser),
7731 .location = {
7732 .start = keyword->start,
7733 .end = closing->end,
7734 },
7735 },
7736 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7737 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7738 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7739 .predicate = predicate,
7740 .statements = statements
7741 };
7742
7743 return node;
7744}
7745
7749static pm_until_node_t *
7750pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7751 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7752 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7753 pm_loop_modifier_block_exits(parser, statements);
7754
7755 *node = (pm_until_node_t) {
7756 {
7757 .type = PM_UNTIL_NODE,
7758 .flags = flags,
7759 .node_id = PM_NODE_IDENTIFY(parser),
7760 .location = {
7761 .start = statements->base.location.start,
7762 .end = predicate->location.end,
7763 },
7764 },
7765 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7766 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7767 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7768 .predicate = predicate,
7769 .statements = statements
7770 };
7771
7772 return node;
7773}
7774
7778static pm_when_node_t *
7779pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7780 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7781
7782 *node = (pm_when_node_t) {
7783 {
7784 .type = PM_WHEN_NODE,
7785 .node_id = PM_NODE_IDENTIFY(parser),
7786 .location = {
7787 .start = keyword->start,
7788 .end = NULL
7789 }
7790 },
7791 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7792 .statements = NULL,
7793 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7794 .conditions = { 0 }
7795 };
7796
7797 return node;
7798}
7799
7803static void
7804pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7805 node->base.location.end = condition->location.end;
7806 pm_node_list_append(&node->conditions, condition);
7807}
7808
7812static inline void
7813pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7814 node->base.location.end = then_keyword->end;
7815 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7816}
7817
7821static void
7822pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7823 if (statements->base.location.end > node->base.location.end) {
7824 node->base.location.end = statements->base.location.end;
7825 }
7826
7827 node->statements = statements;
7828}
7829
7833static pm_while_node_t *
7834pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7835 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7836 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7837
7838 *node = (pm_while_node_t) {
7839 {
7840 .type = PM_WHILE_NODE,
7841 .flags = flags,
7842 .node_id = PM_NODE_IDENTIFY(parser),
7843 .location = {
7844 .start = keyword->start,
7845 .end = closing->end
7846 },
7847 },
7848 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7849 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7850 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7851 .predicate = predicate,
7852 .statements = statements
7853 };
7854
7855 return node;
7856}
7857
7861static pm_while_node_t *
7862pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7863 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7864 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7865 pm_loop_modifier_block_exits(parser, statements);
7866
7867 *node = (pm_while_node_t) {
7868 {
7869 .type = PM_WHILE_NODE,
7870 .flags = flags,
7871 .node_id = PM_NODE_IDENTIFY(parser),
7872 .location = {
7873 .start = statements->base.location.start,
7874 .end = predicate->location.end
7875 },
7876 },
7877 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7878 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7879 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7880 .predicate = predicate,
7881 .statements = statements
7882 };
7883
7884 return node;
7885}
7886
7890static pm_while_node_t *
7891pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7892 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7893
7894 *node = (pm_while_node_t) {
7895 {
7896 .type = PM_WHILE_NODE,
7897 .node_id = PM_NODE_IDENTIFY(parser),
7898 .location = PM_LOCATION_NULL_VALUE(parser)
7899 },
7900 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7901 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7902 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7903 .predicate = predicate,
7904 .statements = statements
7905 };
7906
7907 return node;
7908}
7909
7914static pm_x_string_node_t *
7915pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7916 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7917
7918 *node = (pm_x_string_node_t) {
7919 {
7920 .type = PM_X_STRING_NODE,
7921 .flags = PM_STRING_FLAGS_FROZEN,
7922 .node_id = PM_NODE_IDENTIFY(parser),
7923 .location = {
7924 .start = opening->start,
7925 .end = closing->end
7926 },
7927 },
7928 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7929 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7930 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7931 .unescaped = *unescaped
7932 };
7933
7934 return node;
7935}
7936
7940static inline pm_x_string_node_t *
7941pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7942 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7943}
7944
7948static pm_yield_node_t *
7949pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7950 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7951
7952 const uint8_t *end;
7953 if (rparen_loc->start != NULL) {
7954 end = rparen_loc->end;
7955 } else if (arguments != NULL) {
7956 end = arguments->base.location.end;
7957 } else if (lparen_loc->start != NULL) {
7958 end = lparen_loc->end;
7959 } else {
7960 end = keyword->end;
7961 }
7962
7963 *node = (pm_yield_node_t) {
7964 {
7965 .type = PM_YIELD_NODE,
7966 .node_id = PM_NODE_IDENTIFY(parser),
7967 .location = {
7968 .start = keyword->start,
7969 .end = end
7970 },
7971 },
7972 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7973 .lparen_loc = *lparen_loc,
7974 .arguments = arguments,
7975 .rparen_loc = *rparen_loc
7976 };
7977
7978 return node;
7979}
7980
7981#undef PM_NODE_ALLOC
7982#undef PM_NODE_IDENTIFY
7983
7988static int
7989pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7990 pm_scope_t *scope = parser->current_scope;
7991 int depth = 0;
7992
7993 while (scope != NULL) {
7994 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7995 if (scope->closed) break;
7996
7997 scope = scope->previous;
7998 depth++;
7999 }
8000
8001 return -1;
8002}
8003
8009static inline int
8010pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
8011 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
8012}
8013
8017static inline void
8018pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8019 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8020}
8021
8025static pm_constant_id_t
8026pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8027 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8028 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8029 return constant_id;
8030}
8031
8035static inline pm_constant_id_t
8036pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8037 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8038}
8039
8043static pm_constant_id_t
8044pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8045 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8046 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8047 return constant_id;
8048}
8049
8053static pm_constant_id_t
8054pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8055 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8056 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8057 return constant_id;
8058}
8059
8067static bool
8068pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8069 // We want to check whether the parameter name is a numbered parameter or
8070 // not.
8071 pm_refute_numbered_parameter(parser, name->start, name->end);
8072
8073 // Otherwise we'll fetch the constant id for the parameter name and check
8074 // whether it's already in the current scope.
8075 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8076
8077 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8078 // Add an error if the parameter doesn't start with _ and has been seen before
8079 if ((name->start < name->end) && (*name->start != '_')) {
8080 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8081 }
8082 return true;
8083 }
8084 return false;
8085}
8086
8090static void
8091pm_parser_scope_pop(pm_parser_t *parser) {
8092 pm_scope_t *scope = parser->current_scope;
8093 parser->current_scope = scope->previous;
8094 pm_locals_free(&scope->locals);
8095 pm_node_list_free(&scope->implicit_parameters);
8096 xfree(scope);
8097}
8098
8099/******************************************************************************/
8100/* Stack helpers */
8101/******************************************************************************/
8102
8106static inline void
8107pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8108 *stack = (*stack << 1) | (value & 1);
8109}
8110
8114static inline void
8115pm_state_stack_pop(pm_state_stack_t *stack) {
8116 *stack >>= 1;
8117}
8118
8122static inline bool
8123pm_state_stack_p(const pm_state_stack_t *stack) {
8124 return *stack & 1;
8125}
8126
8127static inline void
8128pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8129 // Use the negation of the value to prevent stack overflow.
8130 pm_state_stack_push(&parser->accepts_block_stack, !value);
8131}
8132
8133static inline void
8134pm_accepts_block_stack_pop(pm_parser_t *parser) {
8135 pm_state_stack_pop(&parser->accepts_block_stack);
8136}
8137
8138static inline bool
8139pm_accepts_block_stack_p(pm_parser_t *parser) {
8140 return !pm_state_stack_p(&parser->accepts_block_stack);
8141}
8142
8143static inline void
8144pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8145 pm_state_stack_push(&parser->do_loop_stack, value);
8146}
8147
8148static inline void
8149pm_do_loop_stack_pop(pm_parser_t *parser) {
8150 pm_state_stack_pop(&parser->do_loop_stack);
8151}
8152
8153static inline bool
8154pm_do_loop_stack_p(pm_parser_t *parser) {
8155 return pm_state_stack_p(&parser->do_loop_stack);
8156}
8157
8158/******************************************************************************/
8159/* Lexer check helpers */
8160/******************************************************************************/
8161
8166static inline uint8_t
8167peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8168 if (cursor < parser->end) {
8169 return *cursor;
8170 } else {
8171 return '\0';
8172 }
8173}
8174
8180static inline uint8_t
8181peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8182 return peek_at(parser, parser->current.end + offset);
8183}
8184
8189static inline uint8_t
8190peek(const pm_parser_t *parser) {
8191 return peek_at(parser, parser->current.end);
8192}
8193
8198static inline bool
8199match(pm_parser_t *parser, uint8_t value) {
8200 if (peek(parser) == value) {
8201 parser->current.end++;
8202 return true;
8203 }
8204 return false;
8205}
8206
8211static inline size_t
8212match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8213 if (peek_at(parser, cursor) == '\n') {
8214 return 1;
8215 }
8216 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8217 return 2;
8218 }
8219 return 0;
8220}
8221
8227static inline size_t
8228match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8229 return match_eol_at(parser, parser->current.end + offset);
8230}
8231
8237static inline size_t
8238match_eol(pm_parser_t *parser) {
8239 return match_eol_at(parser, parser->current.end);
8240}
8241
8245static inline const uint8_t *
8246next_newline(const uint8_t *cursor, ptrdiff_t length) {
8247 assert(length >= 0);
8248
8249 // Note that it's okay for us to use memchr here to look for \n because none
8250 // of the encodings that we support have \n as a component of a multi-byte
8251 // character.
8252 return memchr(cursor, '\n', (size_t) length);
8253}
8254
8258static inline bool
8259ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8260 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8261}
8262
8267static bool
8268parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8269 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8270
8271 if (encoding != NULL) {
8272 if (parser->encoding != encoding) {
8273 parser->encoding = encoding;
8274 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8275 }
8276
8277 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8278 return true;
8279 }
8280
8281 return false;
8282}
8283
8288static void
8289parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8290 const uint8_t *cursor = parser->current.start + 1;
8291 const uint8_t *end = parser->current.end;
8292
8293 bool separator = false;
8294 while (true) {
8295 if (end - cursor <= 6) return;
8296 switch (cursor[6]) {
8297 case 'C': case 'c': cursor += 6; continue;
8298 case 'O': case 'o': cursor += 5; continue;
8299 case 'D': case 'd': cursor += 4; continue;
8300 case 'I': case 'i': cursor += 3; continue;
8301 case 'N': case 'n': cursor += 2; continue;
8302 case 'G': case 'g': cursor += 1; continue;
8303 case '=': case ':':
8304 separator = true;
8305 cursor += 6;
8306 break;
8307 default:
8308 cursor += 6;
8309 if (pm_char_is_whitespace(*cursor)) break;
8310 continue;
8311 }
8312 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8313 separator = false;
8314 }
8315
8316 while (true) {
8317 do {
8318 if (++cursor >= end) return;
8319 } while (pm_char_is_whitespace(*cursor));
8320
8321 if (separator) break;
8322 if (*cursor != '=' && *cursor != ':') return;
8323
8324 separator = true;
8325 cursor++;
8326 }
8327
8328 const uint8_t *value_start = cursor;
8329 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8330
8331 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8332 // If we were unable to parse the encoding value, then we've got an
8333 // issue because we didn't understand the encoding that the user was
8334 // trying to use. In this case we'll keep using the default encoding but
8335 // add an error to the parser to indicate an unsuccessful parse.
8336 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8337 }
8338}
8339
8340typedef enum {
8341 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8342 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8343 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8344} pm_magic_comment_boolean_value_t;
8345
8350static pm_magic_comment_boolean_value_t
8351parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8352 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8353 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8354 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8355 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8356 } else {
8357 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8358 }
8359}
8360
8361static inline bool
8362pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8363 return b == '\'' || b == '"' || b == ':' || b == ';';
8364}
8365
8371static inline const uint8_t *
8372parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8373 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8374 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8375 return cursor;
8376 }
8377 cursor++;
8378 }
8379 return NULL;
8380}
8381
8392static inline bool
8393parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8394 bool result = true;
8395
8396 const uint8_t *start = parser->current.start + 1;
8397 const uint8_t *end = parser->current.end;
8398 if (end - start <= 7) return false;
8399
8400 const uint8_t *cursor;
8401 bool indicator = false;
8402
8403 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8404 start = cursor + 3;
8405
8406 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8407 end = cursor;
8408 indicator = true;
8409 } else {
8410 // If we have a start marker but not an end marker, then we cannot
8411 // have a magic comment.
8412 return false;
8413 }
8414 }
8415
8416 cursor = start;
8417 while (cursor < end) {
8418 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8419
8420 const uint8_t *key_start = cursor;
8421 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8422
8423 const uint8_t *key_end = cursor;
8424 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8425 if (cursor == end) break;
8426
8427 if (*cursor == ':') {
8428 cursor++;
8429 } else {
8430 if (!indicator) return false;
8431 continue;
8432 }
8433
8434 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8435 if (cursor == end) break;
8436
8437 const uint8_t *value_start;
8438 const uint8_t *value_end;
8439
8440 if (*cursor == '"') {
8441 value_start = ++cursor;
8442 for (; cursor < end && *cursor != '"'; cursor++) {
8443 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8444 }
8445 value_end = cursor;
8446 if (*cursor == '"') cursor++;
8447 } else {
8448 value_start = cursor;
8449 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8450 value_end = cursor;
8451 }
8452
8453 if (indicator) {
8454 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8455 } else {
8456 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8457 if (cursor != end) return false;
8458 }
8459
8460 // Here, we need to do some processing on the key to swap out dashes for
8461 // underscores. We only need to do this if there _is_ a dash in the key.
8462 pm_string_t key;
8463 const size_t key_length = (size_t) (key_end - key_start);
8464 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8465
8466 if (dash == NULL) {
8467 pm_string_shared_init(&key, key_start, key_end);
8468 } else {
8469 uint8_t *buffer = xmalloc(key_length);
8470 if (buffer == NULL) break;
8471
8472 memcpy(buffer, key_start, key_length);
8473 buffer[dash - key_start] = '_';
8474
8475 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8476 buffer[dash - key_start] = '_';
8477 }
8478
8479 pm_string_owned_init(&key, buffer, key_length);
8480 }
8481
8482 // Finally, we can start checking the key against the list of known
8483 // magic comment keys, and potentially change state based on that.
8484 const uint8_t *key_source = pm_string_source(&key);
8485 uint32_t value_length = (uint32_t) (value_end - value_start);
8486
8487 // We only want to attempt to compare against encoding comments if it's
8488 // the first line in the file (or the second in the case of a shebang).
8489 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8490 if (
8491 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8492 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8493 ) {
8494 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8495 }
8496 }
8497
8498 if (key_length == 11) {
8499 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8500 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8501 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8502 PM_PARSER_WARN_TOKEN_FORMAT(
8503 parser,
8504 parser->current,
8505 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8506 (int) key_length,
8507 (const char *) key_source,
8508 (int) value_length,
8509 (const char *) value_start
8510 );
8511 break;
8512 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8513 parser->warn_mismatched_indentation = false;
8514 break;
8515 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8516 parser->warn_mismatched_indentation = true;
8517 break;
8518 }
8519 }
8520 } else if (key_length == 21) {
8521 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8522 // We only want to handle frozen string literal comments if it's
8523 // before any semantic tokens have been seen.
8524 if (semantic_token_seen) {
8525 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8526 } else {
8527 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8528 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8529 PM_PARSER_WARN_TOKEN_FORMAT(
8530 parser,
8531 parser->current,
8532 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8533 (int) key_length,
8534 (const char *) key_source,
8535 (int) value_length,
8536 (const char *) value_start
8537 );
8538 break;
8539 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8541 break;
8542 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8544 break;
8545 }
8546 }
8547 }
8548 } else if (key_length == 24) {
8549 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8550 const uint8_t *cursor = parser->current.start;
8551 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8552
8553 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8554 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8555 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8556 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8557 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8558 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8559 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8560 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8561 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8562 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8563 } else {
8564 PM_PARSER_WARN_TOKEN_FORMAT(
8565 parser,
8566 parser->current,
8567 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8568 (int) key_length,
8569 (const char *) key_source,
8570 (int) value_length,
8571 (const char *) value_start
8572 );
8573 }
8574 }
8575 }
8576
8577 // When we're done, we want to free the string in case we had to
8578 // allocate memory for it.
8579 pm_string_free(&key);
8580
8581 // Allocate a new magic comment node to append to the parser's list.
8583 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8584 magic_comment->key_start = key_start;
8585 magic_comment->value_start = value_start;
8586 magic_comment->key_length = (uint32_t) key_length;
8587 magic_comment->value_length = value_length;
8588 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8589 }
8590 }
8591
8592 return result;
8593}
8594
8595/******************************************************************************/
8596/* Context manipulations */
8597/******************************************************************************/
8598
8599static const uint32_t context_terminators[] = {
8600 [PM_CONTEXT_NONE] = 0,
8601 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8602 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8603 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8604 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8605 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
8606 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8607 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8608 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8609 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8610 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
8611 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
8612 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8613 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8614 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8615 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8616 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8617 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8618 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8619 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8620 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
8621 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
8622 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
8623 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
8624 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
8625 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
8626 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
8627 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
8628 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
8629 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
8630 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8631 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8632 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8633 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8634 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
8635 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
8636 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8637 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8638 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8639 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8640 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
8641 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
8642 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
8643 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
8644 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
8645 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
8646 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
8647 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
8648 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
8649 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8650 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
8651 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
8652 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
8653 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
8654};
8655
8656static inline bool
8657context_terminator(pm_context_t context, pm_token_t *token) {
8658 return token->type < 32 && (context_terminators[context] & (1U << token->type));
8659}
8660
8665static pm_context_t
8666context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8667 pm_context_node_t *context_node = parser->current_context;
8668
8669 while (context_node != NULL) {
8670 if (context_terminator(context_node->context, token)) return context_node->context;
8671 context_node = context_node->prev;
8672 }
8673
8674 return PM_CONTEXT_NONE;
8675}
8676
8677static bool
8678context_push(pm_parser_t *parser, pm_context_t context) {
8679 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8680 if (context_node == NULL) return false;
8681
8682 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8683
8684 if (parser->current_context == NULL) {
8685 parser->current_context = context_node;
8686 } else {
8687 context_node->prev = parser->current_context;
8688 parser->current_context = context_node;
8689 }
8690
8691 return true;
8692}
8693
8694static void
8695context_pop(pm_parser_t *parser) {
8696 pm_context_node_t *prev = parser->current_context->prev;
8697 xfree(parser->current_context);
8698 parser->current_context = prev;
8699}
8700
8701static bool
8702context_p(const pm_parser_t *parser, pm_context_t context) {
8703 pm_context_node_t *context_node = parser->current_context;
8704
8705 while (context_node != NULL) {
8706 if (context_node->context == context) return true;
8707 context_node = context_node->prev;
8708 }
8709
8710 return false;
8711}
8712
8713static bool
8714context_def_p(const pm_parser_t *parser) {
8715 pm_context_node_t *context_node = parser->current_context;
8716
8717 while (context_node != NULL) {
8718 switch (context_node->context) {
8719 case PM_CONTEXT_DEF:
8724 return true;
8725 case PM_CONTEXT_CLASS:
8729 case PM_CONTEXT_MODULE:
8733 case PM_CONTEXT_SCLASS:
8737 return false;
8738 default:
8739 context_node = context_node->prev;
8740 }
8741 }
8742
8743 return false;
8744}
8745
8750static const char *
8751context_human(pm_context_t context) {
8752 switch (context) {
8753 case PM_CONTEXT_NONE:
8754 assert(false && "unreachable");
8755 return "";
8756 case PM_CONTEXT_BEGIN: return "begin statement";
8757 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8758 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8759 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8760 case PM_CONTEXT_CASE_IN: return "'in' clause";
8761 case PM_CONTEXT_CLASS: return "class definition";
8762 case PM_CONTEXT_DEF: return "method definition";
8763 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8764 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8765 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8766 case PM_CONTEXT_ELSE:
8773 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8774 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8775 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8782 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8783 case PM_CONTEXT_FOR: return "for loop";
8784 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8785 case PM_CONTEXT_IF: return "if statement";
8786 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8787 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8788 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8789 case PM_CONTEXT_MAIN: return "top level context";
8790 case PM_CONTEXT_MODULE: return "module definition";
8791 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8792 case PM_CONTEXT_PARENS: return "parentheses";
8793 case PM_CONTEXT_POSTEXE: return "'END' block";
8794 case PM_CONTEXT_PREDICATE: return "predicate";
8795 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8803 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8804 case PM_CONTEXT_SCLASS: return "singleton class definition";
8805 case PM_CONTEXT_TERNARY: return "ternary expression";
8806 case PM_CONTEXT_UNLESS: return "unless statement";
8807 case PM_CONTEXT_UNTIL: return "until statement";
8808 case PM_CONTEXT_WHILE: return "while statement";
8809 }
8810
8811 assert(false && "unreachable");
8812 return "";
8813}
8814
8815/******************************************************************************/
8816/* Specific token lexers */
8817/******************************************************************************/
8818
8819static inline void
8820pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8821 if (invalid != NULL) {
8822 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8823 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8824 }
8825}
8826
8827static size_t
8828pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8829 const uint8_t *invalid = NULL;
8830 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8831 pm_strspn_number_validate(parser, string, length, invalid);
8832 return length;
8833}
8834
8835static size_t
8836pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8837 const uint8_t *invalid = NULL;
8838 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8839 pm_strspn_number_validate(parser, string, length, invalid);
8840 return length;
8841}
8842
8843static size_t
8844pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8845 const uint8_t *invalid = NULL;
8846 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8847 pm_strspn_number_validate(parser, string, length, invalid);
8848 return length;
8849}
8850
8851static size_t
8852pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8853 const uint8_t *invalid = NULL;
8854 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8855 pm_strspn_number_validate(parser, string, length, invalid);
8856 return length;
8857}
8858
8859static pm_token_type_t
8860lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8861 pm_token_type_t type = PM_TOKEN_INTEGER;
8862
8863 // Here we're going to attempt to parse the optional decimal portion of a
8864 // float. If it's not there, then it's okay and we'll just continue on.
8865 if (peek(parser) == '.') {
8866 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8867 parser->current.end += 2;
8868 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8869 type = PM_TOKEN_FLOAT;
8870 } else {
8871 // If we had a . and then something else, then it's not a float
8872 // suffix on a number it's a method call or something else.
8873 return type;
8874 }
8875 }
8876
8877 // Here we're going to attempt to parse the optional exponent portion of a
8878 // float. If it's not there, it's okay and we'll just continue on.
8879 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8880 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8881 parser->current.end += 2;
8882
8883 if (pm_char_is_decimal_digit(peek(parser))) {
8884 parser->current.end++;
8885 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8886 } else {
8887 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8888 }
8889 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8890 parser->current.end++;
8891 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8892 } else {
8893 return type;
8894 }
8895
8896 *seen_e = true;
8897 type = PM_TOKEN_FLOAT;
8898 }
8899
8900 return type;
8901}
8902
8903static pm_token_type_t
8904lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8905 pm_token_type_t type = PM_TOKEN_INTEGER;
8906 *seen_e = false;
8907
8908 if (peek_offset(parser, -1) == '0') {
8909 switch (*parser->current.end) {
8910 // 0d1111 is a decimal number
8911 case 'd':
8912 case 'D':
8913 parser->current.end++;
8914 if (pm_char_is_decimal_digit(peek(parser))) {
8915 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8916 } else {
8917 match(parser, '_');
8918 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8919 }
8920
8921 break;
8922
8923 // 0b1111 is a binary number
8924 case 'b':
8925 case 'B':
8926 parser->current.end++;
8927 if (pm_char_is_binary_digit(peek(parser))) {
8928 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8929 } else {
8930 match(parser, '_');
8931 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8932 }
8933
8934 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8935 break;
8936
8937 // 0o1111 is an octal number
8938 case 'o':
8939 case 'O':
8940 parser->current.end++;
8941 if (pm_char_is_octal_digit(peek(parser))) {
8942 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8943 } else {
8944 match(parser, '_');
8945 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8946 }
8947
8948 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8949 break;
8950
8951 // 01111 is an octal number
8952 case '_':
8953 case '0':
8954 case '1':
8955 case '2':
8956 case '3':
8957 case '4':
8958 case '5':
8959 case '6':
8960 case '7':
8961 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8962 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8963 break;
8964
8965 // 0x1111 is a hexadecimal number
8966 case 'x':
8967 case 'X':
8968 parser->current.end++;
8969 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8970 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8971 } else {
8972 match(parser, '_');
8973 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8974 }
8975
8976 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8977 break;
8978
8979 // 0.xxx is a float
8980 case '.': {
8981 type = lex_optional_float_suffix(parser, seen_e);
8982 break;
8983 }
8984
8985 // 0exxx is a float
8986 case 'e':
8987 case 'E': {
8988 type = lex_optional_float_suffix(parser, seen_e);
8989 break;
8990 }
8991 }
8992 } else {
8993 // If it didn't start with a 0, then we'll lex as far as we can into a
8994 // decimal number.
8995 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8996
8997 // Afterward, we'll lex as far as we can into an optional float suffix.
8998 type = lex_optional_float_suffix(parser, seen_e);
8999 }
9000
9001 // At this point we have a completed number, but we want to provide the user
9002 // with a good experience if they put an additional .xxx fractional
9003 // component on the end, so we'll check for that here.
9004 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
9005 const uint8_t *fraction_start = parser->current.end;
9006 const uint8_t *fraction_end = parser->current.end + 2;
9007 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
9008 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
9009 }
9010
9011 return type;
9012}
9013
9014static pm_token_type_t
9015lex_numeric(pm_parser_t *parser) {
9016 pm_token_type_t type = PM_TOKEN_INTEGER;
9017 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
9018
9019 if (parser->current.end < parser->end) {
9020 bool seen_e = false;
9021 type = lex_numeric_prefix(parser, &seen_e);
9022
9023 const uint8_t *end = parser->current.end;
9024 pm_token_type_t suffix_type = type;
9025
9026 if (type == PM_TOKEN_INTEGER) {
9027 if (match(parser, 'r')) {
9028 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9029
9030 if (match(parser, 'i')) {
9031 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
9032 }
9033 } else if (match(parser, 'i')) {
9034 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9035 }
9036 } else {
9037 if (!seen_e && match(parser, 'r')) {
9038 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9039
9040 if (match(parser, 'i')) {
9041 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9042 }
9043 } else if (match(parser, 'i')) {
9044 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9045 }
9046 }
9047
9048 const uint8_t b = peek(parser);
9049 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9050 parser->current.end = end;
9051 } else {
9052 type = suffix_type;
9053 }
9054 }
9055
9056 return type;
9057}
9058
9059static pm_token_type_t
9060lex_global_variable(pm_parser_t *parser) {
9061 if (parser->current.end >= parser->end) {
9062 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9063 return PM_TOKEN_GLOBAL_VARIABLE;
9064 }
9065
9066 // True if multiple characters are allowed after the declaration of the
9067 // global variable. Not true when it starts with "$-".
9068 bool allow_multiple = true;
9069
9070 switch (*parser->current.end) {
9071 case '~': // $~: match-data
9072 case '*': // $*: argv
9073 case '$': // $$: pid
9074 case '?': // $?: last status
9075 case '!': // $!: error string
9076 case '@': // $@: error position
9077 case '/': // $/: input record separator
9078 case '\\': // $\: output record separator
9079 case ';': // $;: field separator
9080 case ',': // $,: output field separator
9081 case '.': // $.: last read line number
9082 case '=': // $=: ignorecase
9083 case ':': // $:: load path
9084 case '<': // $<: reading filename
9085 case '>': // $>: default output handle
9086 case '\"': // $": already loaded files
9087 parser->current.end++;
9088 return PM_TOKEN_GLOBAL_VARIABLE;
9089
9090 case '&': // $&: last match
9091 case '`': // $`: string before last match
9092 case '\'': // $': string after last match
9093 case '+': // $+: string matches last paren.
9094 parser->current.end++;
9095 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9096
9097 case '0': {
9098 parser->current.end++;
9099 size_t width;
9100
9101 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9102 do {
9103 parser->current.end += width;
9104 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9105
9106 // $0 isn't allowed to be followed by anything.
9107 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9108 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9109 }
9110
9111 return PM_TOKEN_GLOBAL_VARIABLE;
9112 }
9113
9114 case '1':
9115 case '2':
9116 case '3':
9117 case '4':
9118 case '5':
9119 case '6':
9120 case '7':
9121 case '8':
9122 case '9':
9123 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9124 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9125
9126 case '-':
9127 parser->current.end++;
9128 allow_multiple = false;
9130 default: {
9131 size_t width;
9132
9133 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9134 do {
9135 parser->current.end += width;
9136 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9137 } else if (pm_char_is_whitespace(peek(parser))) {
9138 // If we get here, then we have a $ followed by whitespace,
9139 // which is not allowed.
9140 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9141 } else {
9142 // If we get here, then we have a $ followed by something that
9143 // isn't recognized as a global variable.
9144 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9145 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9146 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9147 }
9148
9149 return PM_TOKEN_GLOBAL_VARIABLE;
9150 }
9151 }
9152}
9153
9166static inline pm_token_type_t
9167lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9168 if (memcmp(current_start, value, vlen) == 0) {
9169 pm_lex_state_t last_state = parser->lex_state;
9170
9171 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9172 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9173 } else {
9174 lex_state_set(parser, state);
9175 if (state == PM_LEX_STATE_BEG) {
9176 parser->command_start = true;
9177 }
9178
9179 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9180 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9181 return modifier_type;
9182 }
9183 }
9184
9185 return type;
9186 }
9187
9188 return PM_TOKEN_EOF;
9189}
9190
9191static pm_token_type_t
9192lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9193 // Lex as far as we can into the current identifier.
9194 size_t width;
9195 const uint8_t *end = parser->end;
9196 const uint8_t *current_start = parser->current.start;
9197 const uint8_t *current_end = parser->current.end;
9198 bool encoding_changed = parser->encoding_changed;
9199
9200 if (encoding_changed) {
9201 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9202 current_end += width;
9203 }
9204 } else {
9205 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9206 current_end += width;
9207 }
9208 }
9209 parser->current.end = current_end;
9210
9211 // Now cache the length of the identifier so that we can quickly compare it
9212 // against known keywords.
9213 width = (size_t) (current_end - current_start);
9214
9215 if (current_end < end) {
9216 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9217 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9218 // check if we're returning the defined? keyword or just an identifier.
9219 width++;
9220
9221 if (
9222 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9223 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9224 ) {
9225 // If we're in a position where we can accept a : at the end of an
9226 // identifier, then we'll optionally accept it.
9227 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9228 (void) match(parser, ':');
9229 return PM_TOKEN_LABEL;
9230 }
9231
9232 if (parser->lex_state != PM_LEX_STATE_DOT) {
9233 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9234 return PM_TOKEN_KEYWORD_DEFINED;
9235 }
9236 }
9237
9238 return PM_TOKEN_METHOD_NAME;
9239 }
9240
9241 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9242 // If we're in a position where we can accept a = at the end of an
9243 // identifier, then we'll optionally accept it.
9244 return PM_TOKEN_IDENTIFIER;
9245 }
9246
9247 if (
9248 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9249 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9250 ) {
9251 // If we're in a position where we can accept a : at the end of an
9252 // identifier, then we'll optionally accept it.
9253 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9254 (void) match(parser, ':');
9255 return PM_TOKEN_LABEL;
9256 }
9257 }
9258
9259 if (parser->lex_state != PM_LEX_STATE_DOT) {
9260 pm_token_type_t type;
9261 switch (width) {
9262 case 2:
9263 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9264 if (pm_do_loop_stack_p(parser)) {
9265 return PM_TOKEN_KEYWORD_DO_LOOP;
9266 }
9267 return PM_TOKEN_KEYWORD_DO;
9268 }
9269
9270 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 break;
9274 case 3:
9275 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 break;
9283 case 4:
9284 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 break;
9293 case 5:
9294 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9298 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9299 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9302 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9305 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9306 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 break;
9308 case 6:
9309 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9310 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9311 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9312 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9313 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9314 break;
9315 case 8:
9316 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9317 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9318 break;
9319 case 12:
9320 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9321 break;
9322 }
9323 }
9324
9325 if (encoding_changed) {
9326 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9327 }
9328 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9329}
9330
9335static bool
9336current_token_starts_line(pm_parser_t *parser) {
9337 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9338}
9339
9354static pm_token_type_t
9355lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9356 // If there is no content following this #, then we're at the end of
9357 // the string and we can safely return string content.
9358 if (pound + 1 >= parser->end) {
9359 parser->current.end = pound + 1;
9360 return PM_TOKEN_STRING_CONTENT;
9361 }
9362
9363 // Now we'll check against the character that follows the #. If it constitutes
9364 // valid interplation, we'll handle that, otherwise we'll return
9365 // PM_TOKEN_NOT_PROVIDED.
9366 switch (pound[1]) {
9367 case '@': {
9368 // In this case we may have hit an embedded instance or class variable.
9369 if (pound + 2 >= parser->end) {
9370 parser->current.end = pound + 1;
9371 return PM_TOKEN_STRING_CONTENT;
9372 }
9373
9374 // If we're looking at a @ and there's another @, then we'll skip past the
9375 // second @.
9376 const uint8_t *variable = pound + 2;
9377 if (*variable == '@' && pound + 3 < parser->end) variable++;
9378
9379 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9380 // At this point we're sure that we've either hit an embedded instance
9381 // or class variable. In this case we'll first need to check if we've
9382 // already consumed content.
9383 if (pound > parser->current.start) {
9384 parser->current.end = pound;
9385 return PM_TOKEN_STRING_CONTENT;
9386 }
9387
9388 // Otherwise we need to return the embedded variable token
9389 // and then switch to the embedded variable lex mode.
9390 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9391 parser->current.end = pound + 1;
9392 return PM_TOKEN_EMBVAR;
9393 }
9394
9395 // If we didn't get a valid interpolation, then this is just regular
9396 // string content. This is like if we get "#@-". In this case the caller
9397 // should keep lexing.
9398 parser->current.end = pound + 1;
9399 return PM_TOKEN_NOT_PROVIDED;
9400 }
9401 case '$':
9402 // In this case we may have hit an embedded global variable. If there's
9403 // not enough room, then we'll just return string content.
9404 if (pound + 2 >= parser->end) {
9405 parser->current.end = pound + 1;
9406 return PM_TOKEN_STRING_CONTENT;
9407 }
9408
9409 // This is the character that we're going to check to see if it is the
9410 // start of an identifier that would indicate that this is a global
9411 // variable.
9412 const uint8_t *check = pound + 2;
9413
9414 if (pound[2] == '-') {
9415 if (pound + 3 >= parser->end) {
9416 parser->current.end = pound + 2;
9417 return PM_TOKEN_STRING_CONTENT;
9418 }
9419
9420 check++;
9421 }
9422
9423 // If the character that we're going to check is the start of an
9424 // identifier, or we don't have a - and the character is a decimal number
9425 // or a global name punctuation character, then we've hit an embedded
9426 // global variable.
9427 if (
9428 char_is_identifier_start(parser, check, parser->end - check) ||
9429 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9430 ) {
9431 // In this case we've hit an embedded global variable. First check to
9432 // see if we've already consumed content. If we have, then we need to
9433 // return that content as string content first.
9434 if (pound > parser->current.start) {
9435 parser->current.end = pound;
9436 return PM_TOKEN_STRING_CONTENT;
9437 }
9438
9439 // Otherwise, we need to return the embedded variable token and switch
9440 // to the embedded variable lex mode.
9441 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9442 parser->current.end = pound + 1;
9443 return PM_TOKEN_EMBVAR;
9444 }
9445
9446 // In this case we've hit a #$ that does not indicate a global variable.
9447 // In this case we'll continue lexing past it.
9448 parser->current.end = pound + 1;
9449 return PM_TOKEN_NOT_PROVIDED;
9450 case '{':
9451 // In this case it's the start of an embedded expression. If we have
9452 // already consumed content, then we need to return that content as string
9453 // content first.
9454 if (pound > parser->current.start) {
9455 parser->current.end = pound;
9456 return PM_TOKEN_STRING_CONTENT;
9457 }
9458
9459 parser->enclosure_nesting++;
9460
9461 // Otherwise we'll skip past the #{ and begin lexing the embedded
9462 // expression.
9463 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9464 parser->current.end = pound + 2;
9465 parser->command_start = true;
9466 pm_do_loop_stack_push(parser, false);
9467 return PM_TOKEN_EMBEXPR_BEGIN;
9468 default:
9469 // In this case we've hit a # that doesn't constitute interpolation. We'll
9470 // mark that by returning the not provided token type. This tells the
9471 // consumer to keep lexing forward.
9472 parser->current.end = pound + 1;
9473 return PM_TOKEN_NOT_PROVIDED;
9474 }
9475}
9476
9477static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9478static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9479static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9480static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9481static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9482
9486static const bool ascii_printable_chars[] = {
9487 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9490 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9491 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9492 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9493 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9495};
9496
9497static inline bool
9498char_is_ascii_printable(const uint8_t b) {
9499 return (b < 0x80) && ascii_printable_chars[b];
9500}
9501
9506static inline uint8_t
9507escape_hexadecimal_digit(const uint8_t value) {
9508 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9509}
9510
9516static inline uint32_t
9517escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9518 uint32_t value = 0;
9519 for (size_t index = 0; index < length; index++) {
9520 if (index != 0) value <<= 4;
9521 value |= escape_hexadecimal_digit(string[index]);
9522 }
9523
9524 // Here we're going to verify that the value is actually a valid Unicode
9525 // codepoint and not a surrogate pair.
9526 if (value >= 0xD800 && value <= 0xDFFF) {
9527 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9528 return 0xFFFD;
9529 }
9530
9531 return value;
9532}
9533
9537static inline uint8_t
9538escape_byte(uint8_t value, const uint8_t flags) {
9539 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9540 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9541 return value;
9542}
9543
9547static inline void
9548escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9549 // \u escape sequences in string-like structures implicitly change the
9550 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9551 // literal.
9552 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9553 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9554 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9555 }
9556
9558 }
9559
9560 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9561 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9562 pm_buffer_append_byte(buffer, 0xEF);
9563 pm_buffer_append_byte(buffer, 0xBF);
9564 pm_buffer_append_byte(buffer, 0xBD);
9565 }
9566}
9567
9572static inline void
9573escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9574 if (byte >= 0x80) {
9575 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9576 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9577 }
9578
9579 parser->explicit_encoding = parser->encoding;
9580 }
9581
9582 pm_buffer_append_byte(buffer, byte);
9583}
9584
9600static inline void
9601escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9602 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9603 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9604 }
9605
9606 escape_write_byte_encoded(parser, buffer, byte);
9607}
9608
9612static inline void
9613escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9614 size_t width;
9615 if (parser->encoding_changed) {
9616 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9617 } else {
9618 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9619 }
9620
9621 if (width == 1) {
9622 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9623 } else if (width > 1) {
9624 // Valid multibyte character. Just ignore escape.
9625 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9626 pm_buffer_append_bytes(b, parser->current.end, width);
9627 parser->current.end += width;
9628 } else {
9629 // Assume the next character wasn't meant to be part of this escape
9630 // sequence since it is invalid. Add an error and move on.
9631 parser->current.end++;
9632 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9633 }
9634}
9635
9641static void
9642escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9643#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9644
9645 PM_PARSER_WARN_TOKEN_FORMAT(
9646 parser,
9647 parser->current,
9648 PM_WARN_INVALID_CHARACTER,
9649 FLAG(flags),
9650 FLAG(flag),
9651 type
9652 );
9653
9654#undef FLAG
9655}
9656
9660static void
9661escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9662 uint8_t peeked = peek(parser);
9663 switch (peeked) {
9664 case '\\': {
9665 parser->current.end++;
9666 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9667 return;
9668 }
9669 case '\'': {
9670 parser->current.end++;
9671 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9672 return;
9673 }
9674 case 'a': {
9675 parser->current.end++;
9676 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9677 return;
9678 }
9679 case 'b': {
9680 parser->current.end++;
9681 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9682 return;
9683 }
9684 case 'e': {
9685 parser->current.end++;
9686 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9687 return;
9688 }
9689 case 'f': {
9690 parser->current.end++;
9691 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9692 return;
9693 }
9694 case 'n': {
9695 parser->current.end++;
9696 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9697 return;
9698 }
9699 case 'r': {
9700 parser->current.end++;
9701 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9702 return;
9703 }
9704 case 's': {
9705 parser->current.end++;
9706 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9707 return;
9708 }
9709 case 't': {
9710 parser->current.end++;
9711 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9712 return;
9713 }
9714 case 'v': {
9715 parser->current.end++;
9716 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9717 return;
9718 }
9719 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9720 uint8_t value = (uint8_t) (*parser->current.end - '0');
9721 parser->current.end++;
9722
9723 if (pm_char_is_octal_digit(peek(parser))) {
9724 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9725 parser->current.end++;
9726
9727 if (pm_char_is_octal_digit(peek(parser))) {
9728 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9729 parser->current.end++;
9730 }
9731 }
9732
9733 value = escape_byte(value, flags);
9734 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9735 return;
9736 }
9737 case 'x': {
9738 const uint8_t *start = parser->current.end - 1;
9739
9740 parser->current.end++;
9741 uint8_t byte = peek(parser);
9742
9743 if (pm_char_is_hexadecimal_digit(byte)) {
9744 uint8_t value = escape_hexadecimal_digit(byte);
9745 parser->current.end++;
9746
9747 byte = peek(parser);
9748 if (pm_char_is_hexadecimal_digit(byte)) {
9749 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9750 parser->current.end++;
9751 }
9752
9753 value = escape_byte(value, flags);
9754 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9755 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9756 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9757 } else {
9758 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9759 }
9760 }
9761
9762 escape_write_byte_encoded(parser, buffer, value);
9763 } else {
9764 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9765 }
9766
9767 return;
9768 }
9769 case 'u': {
9770 const uint8_t *start = parser->current.end - 1;
9771 parser->current.end++;
9772
9773 if (parser->current.end == parser->end) {
9774 const uint8_t *start = parser->current.end - 2;
9775 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9776 } else if (peek(parser) == '{') {
9777 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9778 parser->current.end++;
9779
9780 size_t whitespace;
9781 while (true) {
9782 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9783 parser->current.end += whitespace;
9784 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9785 // This is super hacky, but it gets us nicer error
9786 // messages because we can still pass it off to the
9787 // regular expression engine even if we hit an
9788 // unterminated regular expression.
9789 parser->current.end += 2;
9790 } else {
9791 break;
9792 }
9793 }
9794
9795 const uint8_t *extra_codepoints_start = NULL;
9796 int codepoints_count = 0;
9797
9798 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9799 const uint8_t *unicode_start = parser->current.end;
9800 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9801
9802 if (hexadecimal_length > 6) {
9803 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9804 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9805 } else if (hexadecimal_length == 0) {
9806 // there are not hexadecimal characters
9807
9808 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9809 // If this is a regular expression, we are going to
9810 // let the regular expression engine handle this
9811 // error instead of us.
9812 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9813 } else {
9814 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9815 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9816 }
9817
9818 return;
9819 }
9820
9821 parser->current.end += hexadecimal_length;
9822 codepoints_count++;
9823 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9824 extra_codepoints_start = unicode_start;
9825 }
9826
9827 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9828 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9829
9830 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9831 }
9832
9833 // ?\u{nnnn} character literal should contain only one codepoint
9834 // and cannot be like ?\u{nnnn mmmm}.
9835 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9836 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9837 }
9838
9839 if (parser->current.end == parser->end) {
9840 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9841 } else if (peek(parser) == '}') {
9842 parser->current.end++;
9843 } else {
9844 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9845 // If this is a regular expression, we are going to let
9846 // the regular expression engine handle this error
9847 // instead of us.
9848 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9849 } else {
9850 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9851 }
9852 }
9853
9854 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9855 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9856 }
9857 } else {
9858 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9859
9860 if (length == 0) {
9861 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9862 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9863 } else {
9864 const uint8_t *start = parser->current.end - 2;
9865 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9866 }
9867 } else if (length == 4) {
9868 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9869
9870 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9871 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9872 }
9873
9874 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9875 parser->current.end += 4;
9876 } else {
9877 parser->current.end += length;
9878
9879 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9880 // If this is a regular expression, we are going to let
9881 // the regular expression engine handle this error
9882 // instead of us.
9883 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9884 } else {
9885 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9886 }
9887 }
9888 }
9889
9890 return;
9891 }
9892 case 'c': {
9893 parser->current.end++;
9894 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9895 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9896 }
9897
9898 if (parser->current.end == parser->end) {
9899 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9900 return;
9901 }
9902
9903 uint8_t peeked = peek(parser);
9904 switch (peeked) {
9905 case '?': {
9906 parser->current.end++;
9907 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9908 return;
9909 }
9910 case '\\':
9911 parser->current.end++;
9912
9913 if (match(parser, 'u') || match(parser, 'U')) {
9914 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9915 return;
9916 }
9917
9918 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9919 return;
9920 case ' ':
9921 parser->current.end++;
9922 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9923 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9924 return;
9925 case '\t':
9926 parser->current.end++;
9927 escape_read_warn(parser, flags, 0, "\\t");
9928 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9929 return;
9930 default: {
9931 if (!char_is_ascii_printable(peeked)) {
9932 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9933 return;
9934 }
9935
9936 parser->current.end++;
9937 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9938 return;
9939 }
9940 }
9941 }
9942 case 'C': {
9943 parser->current.end++;
9944 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9945 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9946 }
9947
9948 if (peek(parser) != '-') {
9949 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9950 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9951 return;
9952 }
9953
9954 parser->current.end++;
9955 if (parser->current.end == parser->end) {
9956 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9957 return;
9958 }
9959
9960 uint8_t peeked = peek(parser);
9961 switch (peeked) {
9962 case '?': {
9963 parser->current.end++;
9964 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9965 return;
9966 }
9967 case '\\':
9968 parser->current.end++;
9969
9970 if (match(parser, 'u') || match(parser, 'U')) {
9971 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9972 return;
9973 }
9974
9975 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9976 return;
9977 case ' ':
9978 parser->current.end++;
9979 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9980 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9981 return;
9982 case '\t':
9983 parser->current.end++;
9984 escape_read_warn(parser, flags, 0, "\\t");
9985 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9986 return;
9987 default: {
9988 if (!char_is_ascii_printable(peeked)) {
9989 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9990 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9991 return;
9992 }
9993
9994 parser->current.end++;
9995 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9996 return;
9997 }
9998 }
9999 }
10000 case 'M': {
10001 parser->current.end++;
10002 if (flags & PM_ESCAPE_FLAG_META) {
10003 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10004 }
10005
10006 if (peek(parser) != '-') {
10007 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10008 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10009 return;
10010 }
10011
10012 parser->current.end++;
10013 if (parser->current.end == parser->end) {
10014 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10015 return;
10016 }
10017
10018 uint8_t peeked = peek(parser);
10019 switch (peeked) {
10020 case '\\':
10021 parser->current.end++;
10022
10023 if (match(parser, 'u') || match(parser, 'U')) {
10024 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10025 return;
10026 }
10027
10028 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10029 return;
10030 case ' ':
10031 parser->current.end++;
10032 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10033 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10034 return;
10035 case '\t':
10036 parser->current.end++;
10037 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10039 return;
10040 default:
10041 if (!char_is_ascii_printable(peeked)) {
10042 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10043 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10044 return;
10045 }
10046
10047 parser->current.end++;
10048 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10049 return;
10050 }
10051 }
10052 case '\r': {
10053 if (peek_offset(parser, 1) == '\n') {
10054 parser->current.end += 2;
10055 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10056 return;
10057 }
10059 }
10060 default: {
10061 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10062 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10063 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10064 return;
10065 }
10066 if (parser->current.end < parser->end) {
10067 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10068 } else {
10069 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10070 }
10071 return;
10072 }
10073 }
10074}
10075
10101static pm_token_type_t
10102lex_question_mark(pm_parser_t *parser) {
10103 if (lex_state_end_p(parser)) {
10104 lex_state_set(parser, PM_LEX_STATE_BEG);
10105 return PM_TOKEN_QUESTION_MARK;
10106 }
10107
10108 if (parser->current.end >= parser->end) {
10109 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10110 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10111 return PM_TOKEN_CHARACTER_LITERAL;
10112 }
10113
10114 if (pm_char_is_whitespace(*parser->current.end)) {
10115 lex_state_set(parser, PM_LEX_STATE_BEG);
10116 return PM_TOKEN_QUESTION_MARK;
10117 }
10118
10119 lex_state_set(parser, PM_LEX_STATE_BEG);
10120
10121 if (match(parser, '\\')) {
10122 lex_state_set(parser, PM_LEX_STATE_END);
10123
10124 pm_buffer_t buffer;
10125 pm_buffer_init_capacity(&buffer, 3);
10126
10127 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10128 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10129
10130 return PM_TOKEN_CHARACTER_LITERAL;
10131 } else {
10132 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10133
10134 // Ternary operators can have a ? immediately followed by an identifier
10135 // which starts with an underscore. We check for this case here.
10136 if (
10137 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10138 (
10139 (parser->current.end + encoding_width >= parser->end) ||
10140 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10141 )
10142 ) {
10143 lex_state_set(parser, PM_LEX_STATE_END);
10144 parser->current.end += encoding_width;
10145 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10146 return PM_TOKEN_CHARACTER_LITERAL;
10147 }
10148 }
10149
10150 return PM_TOKEN_QUESTION_MARK;
10151}
10152
10157static pm_token_type_t
10158lex_at_variable(pm_parser_t *parser) {
10159 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10160 const uint8_t *end = parser->end;
10161
10162 size_t width;
10163 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10164 parser->current.end += width;
10165
10166 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10167 parser->current.end += width;
10168 }
10169 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10170 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10171 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
10172 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10173 }
10174
10175 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10176 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10177 } else {
10178 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10179 pm_parser_err_token(parser, &parser->current, diag_id);
10180 }
10181
10182 // If we're lexing an embedded variable, then we need to pop back into the
10183 // parent lex context.
10184 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10185 lex_mode_pop(parser);
10186 }
10187
10188 return type;
10189}
10190
10194static inline void
10195parser_lex_callback(pm_parser_t *parser) {
10196 if (parser->lex_callback) {
10197 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10198 }
10199}
10200
10204static inline pm_comment_t *
10205parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10206 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10207 if (comment == NULL) return NULL;
10208
10209 *comment = (pm_comment_t) {
10210 .type = type,
10211 .location = { parser->current.start, parser->current.end }
10212 };
10213
10214 return comment;
10215}
10216
10222static pm_token_type_t
10223lex_embdoc(pm_parser_t *parser) {
10224 // First, lex out the EMBDOC_BEGIN token.
10225 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10226
10227 if (newline == NULL) {
10228 parser->current.end = parser->end;
10229 } else {
10230 pm_newline_list_append(&parser->newline_list, newline);
10231 parser->current.end = newline + 1;
10232 }
10233
10234 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10235 parser_lex_callback(parser);
10236
10237 // Now, create a comment that is going to be attached to the parser.
10238 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10239 if (comment == NULL) return PM_TOKEN_EOF;
10240
10241 // Now, loop until we find the end of the embedded documentation or the end
10242 // of the file.
10243 while (parser->current.end + 4 <= parser->end) {
10244 parser->current.start = parser->current.end;
10245
10246 // If we've hit the end of the embedded documentation then we'll return
10247 // that token here.
10248 if (
10249 (memcmp(parser->current.end, "=end", 4) == 0) &&
10250 (
10251 (parser->current.end + 4 == parser->end) || // end of file
10252 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10253 (parser->current.end[4] == '\0') || // NUL or end of script
10254 (parser->current.end[4] == '\004') || // ^D
10255 (parser->current.end[4] == '\032') // ^Z
10256 )
10257 ) {
10258 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10259
10260 if (newline == NULL) {
10261 parser->current.end = parser->end;
10262 } else {
10263 pm_newline_list_append(&parser->newline_list, newline);
10264 parser->current.end = newline + 1;
10265 }
10266
10267 parser->current.type = PM_TOKEN_EMBDOC_END;
10268 parser_lex_callback(parser);
10269
10270 comment->location.end = parser->current.end;
10271 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10272
10273 return PM_TOKEN_EMBDOC_END;
10274 }
10275
10276 // Otherwise, we'll parse until the end of the line and return a line of
10277 // embedded documentation.
10278 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10279
10280 if (newline == NULL) {
10281 parser->current.end = parser->end;
10282 } else {
10283 pm_newline_list_append(&parser->newline_list, newline);
10284 parser->current.end = newline + 1;
10285 }
10286
10287 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10288 parser_lex_callback(parser);
10289 }
10290
10291 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10292
10293 comment->location.end = parser->current.end;
10294 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10295
10296 return PM_TOKEN_EOF;
10297}
10298
10304static inline void
10305parser_lex_ignored_newline(pm_parser_t *parser) {
10306 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10307 parser_lex_callback(parser);
10308}
10309
10319static inline void
10320parser_flush_heredoc_end(pm_parser_t *parser) {
10321 assert(parser->heredoc_end <= parser->end);
10322 parser->next_start = parser->heredoc_end;
10323 parser->heredoc_end = NULL;
10324}
10325
10329static bool
10330parser_end_of_line_p(const pm_parser_t *parser) {
10331 const uint8_t *cursor = parser->current.end;
10332
10333 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10334 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10335 }
10336
10337 return true;
10338}
10339
10358typedef struct {
10364
10369 const uint8_t *cursor;
10371
10391
10395static inline void
10396pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10397 pm_buffer_append_byte(&token_buffer->buffer, byte);
10398}
10399
10400static inline void
10401pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10402 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10403}
10404
10408static inline size_t
10409parser_char_width(const pm_parser_t *parser) {
10410 size_t width;
10411 if (parser->encoding_changed) {
10412 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10413 } else {
10414 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10415 }
10416
10417 // TODO: If the character is invalid in the given encoding, then we'll just
10418 // push one byte into the buffer. This should actually be an error.
10419 return (width == 0 ? 1 : width);
10420}
10421
10425static void
10426pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10427 size_t width = parser_char_width(parser);
10428 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10429 parser->current.end += width;
10430}
10431
10432static void
10433pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10434 size_t width = parser_char_width(parser);
10435 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10436 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10437 parser->current.end += width;
10438}
10439
10440static bool
10441pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10442 for (size_t index = 0; index < length; index++) {
10443 if (value[index] & 0x80) return false;
10444 }
10445
10446 return true;
10447}
10448
10455static inline void
10456pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10457 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10458}
10459
10460static inline void
10461pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10462 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10463 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10464 pm_buffer_free(&token_buffer->regexp_buffer);
10465}
10466
10476static void
10477pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10478 if (token_buffer->cursor == NULL) {
10479 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10480 } else {
10481 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10482 pm_token_buffer_copy(parser, token_buffer);
10483 }
10484}
10485
10486static void
10487pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10488 if (token_buffer->base.cursor == NULL) {
10489 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10490 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10491 } else {
10492 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10493 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10494 pm_regexp_token_buffer_copy(parser, token_buffer);
10495 }
10496}
10497
10498#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10499
10508static void
10509pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10510 const uint8_t *start;
10511 if (token_buffer->cursor == NULL) {
10512 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10513 start = parser->current.start;
10514 } else {
10515 start = token_buffer->cursor;
10516 }
10517
10518 const uint8_t *end = parser->current.end - 1;
10519 assert(end >= start);
10520 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10521
10522 token_buffer->cursor = end;
10523}
10524
10525static void
10526pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10527 const uint8_t *start;
10528 if (token_buffer->base.cursor == NULL) {
10529 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10530 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10531 start = parser->current.start;
10532 } else {
10533 start = token_buffer->base.cursor;
10534 }
10535
10536 const uint8_t *end = parser->current.end - 1;
10537 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10538 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10539
10540 token_buffer->base.cursor = end;
10541}
10542
10543#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10544
10549static inline size_t
10550pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10551 size_t whitespace = 0;
10552
10553 switch (indent) {
10554 case PM_HEREDOC_INDENT_NONE:
10555 // Do nothing, we can't match a terminator with
10556 // indentation and there's no need to calculate common
10557 // whitespace.
10558 break;
10559 case PM_HEREDOC_INDENT_DASH:
10560 // Skip past inline whitespace.
10561 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10562 break;
10563 case PM_HEREDOC_INDENT_TILDE:
10564 // Skip past inline whitespace and calculate common
10565 // whitespace.
10566 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10567 if (**cursor == '\t') {
10568 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10569 } else {
10570 whitespace++;
10571 }
10572 (*cursor)++;
10573 }
10574
10575 break;
10576 }
10577
10578 return whitespace;
10579}
10580
10585static uint8_t
10586pm_lex_percent_delimiter(pm_parser_t *parser) {
10587 size_t eol_length = match_eol(parser);
10588
10589 if (eol_length) {
10590 if (parser->heredoc_end) {
10591 // If we have already lexed a heredoc, then the newline has already
10592 // been added to the list. In this case we want to just flush the
10593 // heredoc end.
10594 parser_flush_heredoc_end(parser);
10595 } else {
10596 // Otherwise, we'll add the newline to the list of newlines.
10597 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10598 }
10599
10600 uint8_t delimiter = *parser->current.end;
10601
10602 // If our delimiter is \r\n, we want to treat it as if it's \n.
10603 // For example, %\r\nfoo\r\n should be "foo"
10604 if (eol_length == 2) {
10605 delimiter = *(parser->current.end + 1);
10606 }
10607
10608 parser->current.end += eol_length;
10609 return delimiter;
10610 }
10611
10612 return *parser->current.end++;
10613}
10614
10619#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10620
10627static void
10628parser_lex(pm_parser_t *parser) {
10629 assert(parser->current.end <= parser->end);
10630 parser->previous = parser->current;
10631
10632 // This value mirrors cmd_state from CRuby.
10633 bool previous_command_start = parser->command_start;
10634 parser->command_start = false;
10635
10636 // This is used to communicate to the newline lexing function that we've
10637 // already seen a comment.
10638 bool lexed_comment = false;
10639
10640 // Here we cache the current value of the semantic token seen flag. This is
10641 // used to reset it in case we find a token that shouldn't flip this flag.
10642 unsigned int semantic_token_seen = parser->semantic_token_seen;
10643 parser->semantic_token_seen = true;
10644
10645 switch (parser->lex_modes.current->mode) {
10646 case PM_LEX_DEFAULT:
10647 case PM_LEX_EMBEXPR:
10648 case PM_LEX_EMBVAR:
10649
10650 // We have a specific named label here because we are going to jump back to
10651 // this location in the event that we have lexed a token that should not be
10652 // returned to the parser. This includes comments, ignored newlines, and
10653 // invalid tokens of some form.
10654 lex_next_token: {
10655 // If we have the special next_start pointer set, then we're going to jump
10656 // to that location and start lexing from there.
10657 if (parser->next_start != NULL) {
10658 parser->current.end = parser->next_start;
10659 parser->next_start = NULL;
10660 }
10661
10662 // This value mirrors space_seen from CRuby. It tracks whether or not
10663 // space has been eaten before the start of the next token.
10664 bool space_seen = false;
10665
10666 // First, we're going to skip past any whitespace at the front of the next
10667 // token.
10668 bool chomping = true;
10669 while (parser->current.end < parser->end && chomping) {
10670 switch (*parser->current.end) {
10671 case ' ':
10672 case '\t':
10673 case '\f':
10674 case '\v':
10675 parser->current.end++;
10676 space_seen = true;
10677 break;
10678 case '\r':
10679 if (match_eol_offset(parser, 1)) {
10680 chomping = false;
10681 } else {
10682 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10683 parser->current.end++;
10684 space_seen = true;
10685 }
10686 break;
10687 case '\\': {
10688 size_t eol_length = match_eol_offset(parser, 1);
10689 if (eol_length) {
10690 if (parser->heredoc_end) {
10691 parser->current.end = parser->heredoc_end;
10692 parser->heredoc_end = NULL;
10693 } else {
10694 parser->current.end += eol_length + 1;
10695 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10696 space_seen = true;
10697 }
10698 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10699 parser->current.end += 2;
10700 } else {
10701 chomping = false;
10702 }
10703
10704 break;
10705 }
10706 default:
10707 chomping = false;
10708 break;
10709 }
10710 }
10711
10712 // Next, we'll set to start of this token to be the current end.
10713 parser->current.start = parser->current.end;
10714
10715 // We'll check if we're at the end of the file. If we are, then we
10716 // need to return the EOF token.
10717 if (parser->current.end >= parser->end) {
10718 // If we hit EOF, but the EOF came immediately after a newline,
10719 // set the start of the token to the newline. This way any EOF
10720 // errors will be reported as happening on that line rather than
10721 // a line after. For example "foo(\n" should report an error
10722 // on line 1 even though EOF technically occurs on line 2.
10723 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10724 parser->current.start -= 1;
10725 }
10726 LEX(PM_TOKEN_EOF);
10727 }
10728
10729 // Finally, we'll check the current character to determine the next
10730 // token.
10731 switch (*parser->current.end++) {
10732 case '\0': // NUL or end of script
10733 case '\004': // ^D
10734 case '\032': // ^Z
10735 parser->current.end--;
10736 LEX(PM_TOKEN_EOF);
10737
10738 case '#': { // comments
10739 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10740 parser->current.end = ending == NULL ? parser->end : ending;
10741
10742 // If we found a comment while lexing, then we're going to
10743 // add it to the list of comments in the file and keep
10744 // lexing.
10745 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10746 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10747
10748 if (ending) parser->current.end++;
10749 parser->current.type = PM_TOKEN_COMMENT;
10750 parser_lex_callback(parser);
10751
10752 // Here, parse the comment to see if it's a magic comment
10753 // and potentially change state on the parser.
10754 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10755 ptrdiff_t length = parser->current.end - parser->current.start;
10756
10757 // If we didn't find a magic comment within the first
10758 // pass and we're at the start of the file, then we need
10759 // to do another pass to potentially find other patterns
10760 // for encoding comments.
10761 if (length >= 10 && !parser->encoding_locked) {
10762 parser_lex_magic_comment_encoding(parser);
10763 }
10764 }
10765
10766 lexed_comment = true;
10767 }
10769 case '\r':
10770 case '\n': {
10771 parser->semantic_token_seen = semantic_token_seen & 0x1;
10772 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10773
10774 if (eol_length) {
10775 // The only way you can have carriage returns in this
10776 // particular loop is if you have a carriage return
10777 // followed by a newline. In that case we'll just skip
10778 // over the carriage return and continue lexing, in
10779 // order to make it so that the newline token
10780 // encapsulates both the carriage return and the
10781 // newline. Note that we need to check that we haven't
10782 // already lexed a comment here because that falls
10783 // through into here as well.
10784 if (!lexed_comment) {
10785 parser->current.end += eol_length - 1; // skip CR
10786 }
10787
10788 if (parser->heredoc_end == NULL) {
10789 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10790 }
10791 }
10792
10793 if (parser->heredoc_end) {
10794 parser_flush_heredoc_end(parser);
10795 }
10796
10797 // If this is an ignored newline, then we can continue lexing after
10798 // calling the callback with the ignored newline token.
10799 switch (lex_state_ignored_p(parser)) {
10800 case PM_IGNORED_NEWLINE_NONE:
10801 break;
10802 case PM_IGNORED_NEWLINE_PATTERN:
10803 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10804 if (!lexed_comment) parser_lex_ignored_newline(parser);
10805 lex_state_set(parser, PM_LEX_STATE_BEG);
10806 parser->command_start = true;
10807 parser->current.type = PM_TOKEN_NEWLINE;
10808 return;
10809 }
10811 case PM_IGNORED_NEWLINE_ALL:
10812 if (!lexed_comment) parser_lex_ignored_newline(parser);
10813 lexed_comment = false;
10814 goto lex_next_token;
10815 }
10816
10817 // Here we need to look ahead and see if there is a call operator
10818 // (either . or &.) that starts the next line. If there is, then this
10819 // is going to become an ignored newline and we're going to instead
10820 // return the call operator.
10821 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10822 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10823
10824 if (next_content < parser->end) {
10825 // If we hit a comment after a newline, then we're going to check
10826 // if it's ignored or if it's followed by a method call ('.').
10827 // If it is, then we're going to call the
10828 // callback with an ignored newline and then continue lexing.
10829 // Otherwise we'll return a regular newline.
10830 if (next_content[0] == '#') {
10831 // Here we look for a "." or "&." following a "\n".
10832 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10833
10834 while (following && (following + 1 < parser->end)) {
10835 following++;
10836 following += pm_strspn_inline_whitespace(following, parser->end - following);
10837
10838 // If this is not followed by a comment, then we can break out
10839 // of this loop.
10840 if (peek_at(parser, following) != '#') break;
10841
10842 // If there is a comment, then we need to find the end of the
10843 // comment and continue searching from there.
10844 following = next_newline(following, parser->end - following);
10845 }
10846
10847 // If the lex state was ignored, we will lex the
10848 // ignored newline.
10849 if (lex_state_ignored_p(parser)) {
10850 if (!lexed_comment) parser_lex_ignored_newline(parser);
10851 lexed_comment = false;
10852 goto lex_next_token;
10853 }
10854
10855 // If we hit a '.' or a '&.' we will lex the ignored
10856 // newline.
10857 if (following && (
10858 (peek_at(parser, following) == '.') ||
10859 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10860 )) {
10861 if (!lexed_comment) parser_lex_ignored_newline(parser);
10862 lexed_comment = false;
10863 goto lex_next_token;
10864 }
10865
10866
10867 // If we are parsing as CRuby 3.5 or later and we
10868 // hit a '&&' or a '||' then we will lex the ignored
10869 // newline.
10870 if (
10872 following && (
10873 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10874 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10875 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
10876 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
10877 )
10878 ) {
10879 if (!lexed_comment) parser_lex_ignored_newline(parser);
10880 lexed_comment = false;
10881 goto lex_next_token;
10882 }
10883 }
10884
10885 // If we hit a . after a newline, then we're in a call chain and
10886 // we need to return the call operator.
10887 if (next_content[0] == '.') {
10888 // To match ripper, we need to emit an ignored newline even though
10889 // it's a real newline in the case that we have a beginless range
10890 // on a subsequent line.
10891 if (peek_at(parser, next_content + 1) == '.') {
10892 if (!lexed_comment) parser_lex_ignored_newline(parser);
10893 lex_state_set(parser, PM_LEX_STATE_BEG);
10894 parser->command_start = true;
10895 parser->current.type = PM_TOKEN_NEWLINE;
10896 return;
10897 }
10898
10899 if (!lexed_comment) parser_lex_ignored_newline(parser);
10900 lex_state_set(parser, PM_LEX_STATE_DOT);
10901 parser->current.start = next_content;
10902 parser->current.end = next_content + 1;
10903 parser->next_start = NULL;
10904 LEX(PM_TOKEN_DOT);
10905 }
10906
10907 // If we hit a &. after a newline, then we're in a call chain and
10908 // we need to return the call operator.
10909 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10910 if (!lexed_comment) parser_lex_ignored_newline(parser);
10911 lex_state_set(parser, PM_LEX_STATE_DOT);
10912 parser->current.start = next_content;
10913 parser->current.end = next_content + 2;
10914 parser->next_start = NULL;
10915 LEX(PM_TOKEN_AMPERSAND_DOT);
10916 }
10917
10918 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
10919 // If we hit an && then we are in a logical chain
10920 // and we need to return the logical operator.
10921 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10922 if (!lexed_comment) parser_lex_ignored_newline(parser);
10923 lex_state_set(parser, PM_LEX_STATE_BEG);
10924 parser->current.start = next_content;
10925 parser->current.end = next_content + 2;
10926 parser->next_start = NULL;
10927 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10928 }
10929
10930 // If we hit a || then we are in a logical chain and
10931 // we need to return the logical operator.
10932 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10933 if (!lexed_comment) parser_lex_ignored_newline(parser);
10934 lex_state_set(parser, PM_LEX_STATE_BEG);
10935 parser->current.start = next_content;
10936 parser->current.end = next_content + 2;
10937 parser->next_start = NULL;
10938 LEX(PM_TOKEN_PIPE_PIPE);
10939 }
10940
10941 // If we hit an 'and' then we are in a logical chain
10942 // and we need to return the logical operator.
10943 if (
10944 peek_at(parser, next_content) == 'a' &&
10945 peek_at(parser, next_content + 1) == 'n' &&
10946 peek_at(parser, next_content + 2) == 'd' &&
10947 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10948 ) {
10949 if (!lexed_comment) parser_lex_ignored_newline(parser);
10950 lex_state_set(parser, PM_LEX_STATE_BEG);
10951 parser->current.start = next_content;
10952 parser->current.end = next_content + 3;
10953 parser->next_start = NULL;
10954 parser->command_start = true;
10955 LEX(PM_TOKEN_KEYWORD_AND);
10956 }
10957
10958 // If we hit a 'or' then we are in a logical chain
10959 // and we need to return the logical operator.
10960 if (
10961 peek_at(parser, next_content) == 'o' &&
10962 peek_at(parser, next_content + 1) == 'r' &&
10963 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10964 ) {
10965 if (!lexed_comment) parser_lex_ignored_newline(parser);
10966 lex_state_set(parser, PM_LEX_STATE_BEG);
10967 parser->current.start = next_content;
10968 parser->current.end = next_content + 2;
10969 parser->next_start = NULL;
10970 parser->command_start = true;
10971 LEX(PM_TOKEN_KEYWORD_OR);
10972 }
10973 }
10974 }
10975
10976 // At this point we know this is a regular newline, and we can set the
10977 // necessary state and return the token.
10978 lex_state_set(parser, PM_LEX_STATE_BEG);
10979 parser->command_start = true;
10980 parser->current.type = PM_TOKEN_NEWLINE;
10981 if (!lexed_comment) parser_lex_callback(parser);
10982 return;
10983 }
10984
10985 // ,
10986 case ',':
10987 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10988 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10989 }
10990
10991 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10992 LEX(PM_TOKEN_COMMA);
10993
10994 // (
10995 case '(': {
10996 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10997
10998 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10999 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
11000 }
11001
11002 parser->enclosure_nesting++;
11003 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11004 pm_do_loop_stack_push(parser, false);
11005 LEX(type);
11006 }
11007
11008 // )
11009 case ')':
11010 parser->enclosure_nesting--;
11011 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11012 pm_do_loop_stack_pop(parser);
11013 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
11014
11015 // ;
11016 case ';':
11017 lex_state_set(parser, PM_LEX_STATE_BEG);
11018 parser->command_start = true;
11019 LEX(PM_TOKEN_SEMICOLON);
11020
11021 // [ [] []=
11022 case '[':
11023 parser->enclosure_nesting++;
11024 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
11025
11026 if (lex_state_operator_p(parser)) {
11027 if (match(parser, ']')) {
11028 parser->enclosure_nesting--;
11029 lex_state_set(parser, PM_LEX_STATE_ARG);
11030 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
11031 }
11032
11033 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
11034 LEX(type);
11035 }
11036
11037 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
11038 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
11039 }
11040
11041 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11042 pm_do_loop_stack_push(parser, false);
11043 LEX(type);
11044
11045 // ]
11046 case ']':
11047 parser->enclosure_nesting--;
11048 lex_state_set(parser, PM_LEX_STATE_END);
11049 pm_do_loop_stack_pop(parser);
11050 LEX(PM_TOKEN_BRACKET_RIGHT);
11051
11052 // {
11053 case '{': {
11054 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
11055
11056 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
11057 // This { begins a lambda
11058 parser->command_start = true;
11059 lex_state_set(parser, PM_LEX_STATE_BEG);
11060 type = PM_TOKEN_LAMBDA_BEGIN;
11061 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
11062 // This { begins a hash literal
11063 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11064 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
11065 // This { begins a block
11066 parser->command_start = true;
11067 lex_state_set(parser, PM_LEX_STATE_BEG);
11068 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
11069 // This { begins a block on a command
11070 parser->command_start = true;
11071 lex_state_set(parser, PM_LEX_STATE_BEG);
11072 } else {
11073 // This { begins a hash literal
11074 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11075 }
11076
11077 parser->enclosure_nesting++;
11078 parser->brace_nesting++;
11079 pm_do_loop_stack_push(parser, false);
11080
11081 LEX(type);
11082 }
11083
11084 // }
11085 case '}':
11086 parser->enclosure_nesting--;
11087 pm_do_loop_stack_pop(parser);
11088
11089 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11090 lex_mode_pop(parser);
11091 LEX(PM_TOKEN_EMBEXPR_END);
11092 }
11093
11094 parser->brace_nesting--;
11095 lex_state_set(parser, PM_LEX_STATE_END);
11096 LEX(PM_TOKEN_BRACE_RIGHT);
11097
11098 // * ** **= *=
11099 case '*': {
11100 if (match(parser, '*')) {
11101 if (match(parser, '=')) {
11102 lex_state_set(parser, PM_LEX_STATE_BEG);
11103 LEX(PM_TOKEN_STAR_STAR_EQUAL);
11104 }
11105
11106 pm_token_type_t type = PM_TOKEN_STAR_STAR;
11107
11108 if (lex_state_spcarg_p(parser, space_seen)) {
11109 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11110 type = PM_TOKEN_USTAR_STAR;
11111 } else if (lex_state_beg_p(parser)) {
11112 type = PM_TOKEN_USTAR_STAR;
11113 } else if (ambiguous_operator_p(parser, space_seen)) {
11114 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11115 }
11116
11117 if (lex_state_operator_p(parser)) {
11118 lex_state_set(parser, PM_LEX_STATE_ARG);
11119 } else {
11120 lex_state_set(parser, PM_LEX_STATE_BEG);
11121 }
11122
11123 LEX(type);
11124 }
11125
11126 if (match(parser, '=')) {
11127 lex_state_set(parser, PM_LEX_STATE_BEG);
11128 LEX(PM_TOKEN_STAR_EQUAL);
11129 }
11130
11131 pm_token_type_t type = PM_TOKEN_STAR;
11132
11133 if (lex_state_spcarg_p(parser, space_seen)) {
11134 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11135 type = PM_TOKEN_USTAR;
11136 } else if (lex_state_beg_p(parser)) {
11137 type = PM_TOKEN_USTAR;
11138 } else if (ambiguous_operator_p(parser, space_seen)) {
11139 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11140 }
11141
11142 if (lex_state_operator_p(parser)) {
11143 lex_state_set(parser, PM_LEX_STATE_ARG);
11144 } else {
11145 lex_state_set(parser, PM_LEX_STATE_BEG);
11146 }
11147
11148 LEX(type);
11149 }
11150
11151 // ! != !~ !@
11152 case '!':
11153 if (lex_state_operator_p(parser)) {
11154 lex_state_set(parser, PM_LEX_STATE_ARG);
11155 if (match(parser, '@')) {
11156 LEX(PM_TOKEN_BANG);
11157 }
11158 } else {
11159 lex_state_set(parser, PM_LEX_STATE_BEG);
11160 }
11161
11162 if (match(parser, '=')) {
11163 LEX(PM_TOKEN_BANG_EQUAL);
11164 }
11165
11166 if (match(parser, '~')) {
11167 LEX(PM_TOKEN_BANG_TILDE);
11168 }
11169
11170 LEX(PM_TOKEN_BANG);
11171
11172 // = => =~ == === =begin
11173 case '=':
11174 if (
11175 current_token_starts_line(parser) &&
11176 (parser->current.end + 5 <= parser->end) &&
11177 memcmp(parser->current.end, "begin", 5) == 0 &&
11178 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11179 ) {
11180 pm_token_type_t type = lex_embdoc(parser);
11181 if (type == PM_TOKEN_EOF) {
11182 LEX(type);
11183 }
11184
11185 goto lex_next_token;
11186 }
11187
11188 if (lex_state_operator_p(parser)) {
11189 lex_state_set(parser, PM_LEX_STATE_ARG);
11190 } else {
11191 lex_state_set(parser, PM_LEX_STATE_BEG);
11192 }
11193
11194 if (match(parser, '>')) {
11195 LEX(PM_TOKEN_EQUAL_GREATER);
11196 }
11197
11198 if (match(parser, '~')) {
11199 LEX(PM_TOKEN_EQUAL_TILDE);
11200 }
11201
11202 if (match(parser, '=')) {
11203 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11204 }
11205
11206 LEX(PM_TOKEN_EQUAL);
11207
11208 // < << <<= <= <=>
11209 case '<':
11210 if (match(parser, '<')) {
11211 if (
11212 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11213 !lex_state_end_p(parser) &&
11214 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11215 ) {
11216 const uint8_t *end = parser->current.end;
11217
11218 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11219 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11220
11221 if (match(parser, '-')) {
11222 indent = PM_HEREDOC_INDENT_DASH;
11223 }
11224 else if (match(parser, '~')) {
11225 indent = PM_HEREDOC_INDENT_TILDE;
11226 }
11227
11228 if (match(parser, '`')) {
11229 quote = PM_HEREDOC_QUOTE_BACKTICK;
11230 }
11231 else if (match(parser, '"')) {
11232 quote = PM_HEREDOC_QUOTE_DOUBLE;
11233 }
11234 else if (match(parser, '\'')) {
11235 quote = PM_HEREDOC_QUOTE_SINGLE;
11236 }
11237
11238 const uint8_t *ident_start = parser->current.end;
11239 size_t width = 0;
11240
11241 if (parser->current.end >= parser->end) {
11242 parser->current.end = end;
11243 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11244 parser->current.end = end;
11245 } else {
11246 if (quote == PM_HEREDOC_QUOTE_NONE) {
11247 parser->current.end += width;
11248
11249 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11250 parser->current.end += width;
11251 }
11252 } else {
11253 // If we have quotes, then we're going to go until we find the
11254 // end quote.
11255 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11256 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11257 parser->current.end++;
11258 }
11259 }
11260
11261 size_t ident_length = (size_t) (parser->current.end - ident_start);
11262 bool ident_error = false;
11263
11264 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11265 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11266 ident_error = true;
11267 }
11268
11269 parser->explicit_encoding = NULL;
11270 lex_mode_push(parser, (pm_lex_mode_t) {
11271 .mode = PM_LEX_HEREDOC,
11272 .as.heredoc = {
11273 .base = {
11274 .ident_start = ident_start,
11275 .ident_length = ident_length,
11276 .quote = quote,
11277 .indent = indent
11278 },
11279 .next_start = parser->current.end,
11280 .common_whitespace = NULL,
11281 .line_continuation = false
11282 }
11283 });
11284
11285 if (parser->heredoc_end == NULL) {
11286 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11287
11288 if (body_start == NULL) {
11289 // If there is no newline after the heredoc identifier, then
11290 // this is not a valid heredoc declaration. In this case we
11291 // will add an error, but we will still return a heredoc
11292 // start.
11293 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11294 body_start = parser->end;
11295 } else {
11296 // Otherwise, we want to indicate that the body of the
11297 // heredoc starts on the character after the next newline.
11298 pm_newline_list_append(&parser->newline_list, body_start);
11299 body_start++;
11300 }
11301
11302 parser->next_start = body_start;
11303 } else {
11304 parser->next_start = parser->heredoc_end;
11305 }
11306
11307 LEX(PM_TOKEN_HEREDOC_START);
11308 }
11309 }
11310
11311 if (match(parser, '=')) {
11312 lex_state_set(parser, PM_LEX_STATE_BEG);
11313 LEX(PM_TOKEN_LESS_LESS_EQUAL);
11314 }
11315
11316 if (ambiguous_operator_p(parser, space_seen)) {
11317 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11318 }
11319
11320 if (lex_state_operator_p(parser)) {
11321 lex_state_set(parser, PM_LEX_STATE_ARG);
11322 } else {
11323 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11324 lex_state_set(parser, PM_LEX_STATE_BEG);
11325 }
11326
11327 LEX(PM_TOKEN_LESS_LESS);
11328 }
11329
11330 if (lex_state_operator_p(parser)) {
11331 lex_state_set(parser, PM_LEX_STATE_ARG);
11332 } else {
11333 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11334 lex_state_set(parser, PM_LEX_STATE_BEG);
11335 }
11336
11337 if (match(parser, '=')) {
11338 if (match(parser, '>')) {
11339 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
11340 }
11341
11342 LEX(PM_TOKEN_LESS_EQUAL);
11343 }
11344
11345 LEX(PM_TOKEN_LESS);
11346
11347 // > >> >>= >=
11348 case '>':
11349 if (match(parser, '>')) {
11350 if (lex_state_operator_p(parser)) {
11351 lex_state_set(parser, PM_LEX_STATE_ARG);
11352 } else {
11353 lex_state_set(parser, PM_LEX_STATE_BEG);
11354 }
11355 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11356 }
11357
11358 if (lex_state_operator_p(parser)) {
11359 lex_state_set(parser, PM_LEX_STATE_ARG);
11360 } else {
11361 lex_state_set(parser, PM_LEX_STATE_BEG);
11362 }
11363
11364 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11365
11366 // double-quoted string literal
11367 case '"': {
11368 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11369 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11370 LEX(PM_TOKEN_STRING_BEGIN);
11371 }
11372
11373 // xstring literal
11374 case '`': {
11375 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11376 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11377 LEX(PM_TOKEN_BACKTICK);
11378 }
11379
11380 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11381 if (previous_command_start) {
11382 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11383 } else {
11384 lex_state_set(parser, PM_LEX_STATE_ARG);
11385 }
11386
11387 LEX(PM_TOKEN_BACKTICK);
11388 }
11389
11390 lex_mode_push_string(parser, true, false, '\0', '`');
11391 LEX(PM_TOKEN_BACKTICK);
11392 }
11393
11394 // single-quoted string literal
11395 case '\'': {
11396 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11397 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11398 LEX(PM_TOKEN_STRING_BEGIN);
11399 }
11400
11401 // ? character literal
11402 case '?':
11403 LEX(lex_question_mark(parser));
11404
11405 // & && &&= &=
11406 case '&': {
11407 if (match(parser, '&')) {
11408 lex_state_set(parser, PM_LEX_STATE_BEG);
11409
11410 if (match(parser, '=')) {
11411 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
11412 }
11413
11414 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
11415 }
11416
11417 if (match(parser, '=')) {
11418 lex_state_set(parser, PM_LEX_STATE_BEG);
11419 LEX(PM_TOKEN_AMPERSAND_EQUAL);
11420 }
11421
11422 if (match(parser, '.')) {
11423 lex_state_set(parser, PM_LEX_STATE_DOT);
11424 LEX(PM_TOKEN_AMPERSAND_DOT);
11425 }
11426
11427 pm_token_type_t type = PM_TOKEN_AMPERSAND;
11428 if (lex_state_spcarg_p(parser, space_seen)) {
11429 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11430 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11431 } else {
11432 const uint8_t delim = peek_offset(parser, 1);
11433
11434 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11435 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11436 }
11437 }
11438
11439 type = PM_TOKEN_UAMPERSAND;
11440 } else if (lex_state_beg_p(parser)) {
11441 type = PM_TOKEN_UAMPERSAND;
11442 } else if (ambiguous_operator_p(parser, space_seen)) {
11443 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11444 }
11445
11446 if (lex_state_operator_p(parser)) {
11447 lex_state_set(parser, PM_LEX_STATE_ARG);
11448 } else {
11449 lex_state_set(parser, PM_LEX_STATE_BEG);
11450 }
11451
11452 LEX(type);
11453 }
11454
11455 // | || ||= |=
11456 case '|':
11457 if (match(parser, '|')) {
11458 if (match(parser, '=')) {
11459 lex_state_set(parser, PM_LEX_STATE_BEG);
11460 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
11461 }
11462
11463 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11464 parser->current.end--;
11465 LEX(PM_TOKEN_PIPE);
11466 }
11467
11468 lex_state_set(parser, PM_LEX_STATE_BEG);
11469 LEX(PM_TOKEN_PIPE_PIPE);
11470 }
11471
11472 if (match(parser, '=')) {
11473 lex_state_set(parser, PM_LEX_STATE_BEG);
11474 LEX(PM_TOKEN_PIPE_EQUAL);
11475 }
11476
11477 if (lex_state_operator_p(parser)) {
11478 lex_state_set(parser, PM_LEX_STATE_ARG);
11479 } else {
11480 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11481 }
11482
11483 LEX(PM_TOKEN_PIPE);
11484
11485 // + += +@
11486 case '+': {
11487 if (lex_state_operator_p(parser)) {
11488 lex_state_set(parser, PM_LEX_STATE_ARG);
11489
11490 if (match(parser, '@')) {
11491 LEX(PM_TOKEN_UPLUS);
11492 }
11493
11494 LEX(PM_TOKEN_PLUS);
11495 }
11496
11497 if (match(parser, '=')) {
11498 lex_state_set(parser, PM_LEX_STATE_BEG);
11499 LEX(PM_TOKEN_PLUS_EQUAL);
11500 }
11501
11502 if (
11503 lex_state_beg_p(parser) ||
11504 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11505 ) {
11506 lex_state_set(parser, PM_LEX_STATE_BEG);
11507
11508 if (pm_char_is_decimal_digit(peek(parser))) {
11509 parser->current.end++;
11510 pm_token_type_t type = lex_numeric(parser);
11511 lex_state_set(parser, PM_LEX_STATE_END);
11512 LEX(type);
11513 }
11514
11515 LEX(PM_TOKEN_UPLUS);
11516 }
11517
11518 if (ambiguous_operator_p(parser, space_seen)) {
11519 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11520 }
11521
11522 lex_state_set(parser, PM_LEX_STATE_BEG);
11523 LEX(PM_TOKEN_PLUS);
11524 }
11525
11526 // - -= -@
11527 case '-': {
11528 if (lex_state_operator_p(parser)) {
11529 lex_state_set(parser, PM_LEX_STATE_ARG);
11530
11531 if (match(parser, '@')) {
11532 LEX(PM_TOKEN_UMINUS);
11533 }
11534
11535 LEX(PM_TOKEN_MINUS);
11536 }
11537
11538 if (match(parser, '=')) {
11539 lex_state_set(parser, PM_LEX_STATE_BEG);
11540 LEX(PM_TOKEN_MINUS_EQUAL);
11541 }
11542
11543 if (match(parser, '>')) {
11544 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11545 LEX(PM_TOKEN_MINUS_GREATER);
11546 }
11547
11548 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11549 bool is_beg = lex_state_beg_p(parser);
11550 if (!is_beg && spcarg) {
11551 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11552 }
11553
11554 if (is_beg || spcarg) {
11555 lex_state_set(parser, PM_LEX_STATE_BEG);
11556 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11557 }
11558
11559 if (ambiguous_operator_p(parser, space_seen)) {
11560 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11561 }
11562
11563 lex_state_set(parser, PM_LEX_STATE_BEG);
11564 LEX(PM_TOKEN_MINUS);
11565 }
11566
11567 // . .. ...
11568 case '.': {
11569 bool beg_p = lex_state_beg_p(parser);
11570
11571 if (match(parser, '.')) {
11572 if (match(parser, '.')) {
11573 // If we're _not_ inside a range within default parameters
11574 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11575 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11576 lex_state_set(parser, PM_LEX_STATE_BEG);
11577 } else {
11578 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11579 }
11580 LEX(PM_TOKEN_UDOT_DOT_DOT);
11581 }
11582
11583 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11584 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11585 }
11586
11587 lex_state_set(parser, PM_LEX_STATE_BEG);
11588 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
11589 }
11590
11591 lex_state_set(parser, PM_LEX_STATE_BEG);
11592 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11593 }
11594
11595 lex_state_set(parser, PM_LEX_STATE_DOT);
11596 LEX(PM_TOKEN_DOT);
11597 }
11598
11599 // integer
11600 case '0':
11601 case '1':
11602 case '2':
11603 case '3':
11604 case '4':
11605 case '5':
11606 case '6':
11607 case '7':
11608 case '8':
11609 case '9': {
11610 pm_token_type_t type = lex_numeric(parser);
11611 lex_state_set(parser, PM_LEX_STATE_END);
11612 LEX(type);
11613 }
11614
11615 // :: symbol
11616 case ':':
11617 if (match(parser, ':')) {
11618 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11619 lex_state_set(parser, PM_LEX_STATE_BEG);
11620 LEX(PM_TOKEN_UCOLON_COLON);
11621 }
11622
11623 lex_state_set(parser, PM_LEX_STATE_DOT);
11624 LEX(PM_TOKEN_COLON_COLON);
11625 }
11626
11627 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11628 lex_state_set(parser, PM_LEX_STATE_BEG);
11629 LEX(PM_TOKEN_COLON);
11630 }
11631
11632 if (peek(parser) == '"' || peek(parser) == '\'') {
11633 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11634 parser->current.end++;
11635 }
11636
11637 lex_state_set(parser, PM_LEX_STATE_FNAME);
11638 LEX(PM_TOKEN_SYMBOL_BEGIN);
11639
11640 // / /=
11641 case '/':
11642 if (lex_state_beg_p(parser)) {
11643 lex_mode_push_regexp(parser, '\0', '/');
11644 LEX(PM_TOKEN_REGEXP_BEGIN);
11645 }
11646
11647 if (match(parser, '=')) {
11648 lex_state_set(parser, PM_LEX_STATE_BEG);
11649 LEX(PM_TOKEN_SLASH_EQUAL);
11650 }
11651
11652 if (lex_state_spcarg_p(parser, space_seen)) {
11653 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11654 lex_mode_push_regexp(parser, '\0', '/');
11655 LEX(PM_TOKEN_REGEXP_BEGIN);
11656 }
11657
11658 if (ambiguous_operator_p(parser, space_seen)) {
11659 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11660 }
11661
11662 if (lex_state_operator_p(parser)) {
11663 lex_state_set(parser, PM_LEX_STATE_ARG);
11664 } else {
11665 lex_state_set(parser, PM_LEX_STATE_BEG);
11666 }
11667
11668 LEX(PM_TOKEN_SLASH);
11669
11670 // ^ ^=
11671 case '^':
11672 if (lex_state_operator_p(parser)) {
11673 lex_state_set(parser, PM_LEX_STATE_ARG);
11674 } else {
11675 lex_state_set(parser, PM_LEX_STATE_BEG);
11676 }
11677 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11678
11679 // ~ ~@
11680 case '~':
11681 if (lex_state_operator_p(parser)) {
11682 (void) match(parser, '@');
11683 lex_state_set(parser, PM_LEX_STATE_ARG);
11684 } else {
11685 lex_state_set(parser, PM_LEX_STATE_BEG);
11686 }
11687
11688 LEX(PM_TOKEN_TILDE);
11689
11690 // % %= %i %I %q %Q %w %W
11691 case '%': {
11692 // If there is no subsequent character then we have an
11693 // invalid token. We're going to say it's the percent
11694 // operator because we don't want to move into the string
11695 // lex mode unnecessarily.
11696 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11697 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11698 LEX(PM_TOKEN_PERCENT);
11699 }
11700
11701 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11702 lex_state_set(parser, PM_LEX_STATE_BEG);
11703 LEX(PM_TOKEN_PERCENT_EQUAL);
11704 } else if (
11705 lex_state_beg_p(parser) ||
11706 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11707 lex_state_spcarg_p(parser, space_seen)
11708 ) {
11709 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11710 if (*parser->current.end >= 0x80) {
11711 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11712 }
11713
11714 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11715 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11716 LEX(PM_TOKEN_STRING_BEGIN);
11717 }
11718
11719 // Delimiters for %-literals cannot be alphanumeric. We
11720 // validate that here.
11721 uint8_t delimiter = peek_offset(parser, 1);
11722 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11723 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11724 goto lex_next_token;
11725 }
11726
11727 switch (peek(parser)) {
11728 case 'i': {
11729 parser->current.end++;
11730
11731 if (parser->current.end < parser->end) {
11732 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11733 } else {
11734 lex_mode_push_list_eof(parser);
11735 }
11736
11737 LEX(PM_TOKEN_PERCENT_LOWER_I);
11738 }
11739 case 'I': {
11740 parser->current.end++;
11741
11742 if (parser->current.end < parser->end) {
11743 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11744 } else {
11745 lex_mode_push_list_eof(parser);
11746 }
11747
11748 LEX(PM_TOKEN_PERCENT_UPPER_I);
11749 }
11750 case 'r': {
11751 parser->current.end++;
11752
11753 if (parser->current.end < parser->end) {
11754 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11755 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11756 } else {
11757 lex_mode_push_regexp(parser, '\0', '\0');
11758 }
11759
11760 LEX(PM_TOKEN_REGEXP_BEGIN);
11761 }
11762 case 'q': {
11763 parser->current.end++;
11764
11765 if (parser->current.end < parser->end) {
11766 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11767 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11768 } else {
11769 lex_mode_push_string_eof(parser);
11770 }
11771
11772 LEX(PM_TOKEN_STRING_BEGIN);
11773 }
11774 case 'Q': {
11775 parser->current.end++;
11776
11777 if (parser->current.end < parser->end) {
11778 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11779 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11780 } else {
11781 lex_mode_push_string_eof(parser);
11782 }
11783
11784 LEX(PM_TOKEN_STRING_BEGIN);
11785 }
11786 case 's': {
11787 parser->current.end++;
11788
11789 if (parser->current.end < parser->end) {
11790 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11791 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11792 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11793 } else {
11794 lex_mode_push_string_eof(parser);
11795 }
11796
11797 LEX(PM_TOKEN_SYMBOL_BEGIN);
11798 }
11799 case 'w': {
11800 parser->current.end++;
11801
11802 if (parser->current.end < parser->end) {
11803 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11804 } else {
11805 lex_mode_push_list_eof(parser);
11806 }
11807
11808 LEX(PM_TOKEN_PERCENT_LOWER_W);
11809 }
11810 case 'W': {
11811 parser->current.end++;
11812
11813 if (parser->current.end < parser->end) {
11814 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11815 } else {
11816 lex_mode_push_list_eof(parser);
11817 }
11818
11819 LEX(PM_TOKEN_PERCENT_UPPER_W);
11820 }
11821 case 'x': {
11822 parser->current.end++;
11823
11824 if (parser->current.end < parser->end) {
11825 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11826 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11827 } else {
11828 lex_mode_push_string_eof(parser);
11829 }
11830
11831 LEX(PM_TOKEN_PERCENT_LOWER_X);
11832 }
11833 default:
11834 // If we get to this point, then we have a % that is completely
11835 // unparsable. In this case we'll just drop it from the parser
11836 // and skip past it and hope that the next token is something
11837 // that we can parse.
11838 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11839 goto lex_next_token;
11840 }
11841 }
11842
11843 if (ambiguous_operator_p(parser, space_seen)) {
11844 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11845 }
11846
11847 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11848 LEX(PM_TOKEN_PERCENT);
11849 }
11850
11851 // global variable
11852 case '$': {
11853 pm_token_type_t type = lex_global_variable(parser);
11854
11855 // If we're lexing an embedded variable, then we need to pop back into
11856 // the parent lex context.
11857 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11858 lex_mode_pop(parser);
11859 }
11860
11861 lex_state_set(parser, PM_LEX_STATE_END);
11862 LEX(type);
11863 }
11864
11865 // instance variable, class variable
11866 case '@':
11867 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11868 LEX(lex_at_variable(parser));
11869
11870 default: {
11871 if (*parser->current.start != '_') {
11872 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11873
11874 // If this isn't the beginning of an identifier, then
11875 // it's an invalid token as we've exhausted all of the
11876 // other options. We'll skip past it and return the next
11877 // token after adding an appropriate error message.
11878 if (!width) {
11879 if (*parser->current.start >= 0x80) {
11880 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11881 } else if (*parser->current.start == '\\') {
11882 switch (peek_at(parser, parser->current.start + 1)) {
11883 case ' ':
11884 parser->current.end++;
11885 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11886 break;
11887 case '\f':
11888 parser->current.end++;
11889 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11890 break;
11891 case '\t':
11892 parser->current.end++;
11893 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11894 break;
11895 case '\v':
11896 parser->current.end++;
11897 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11898 break;
11899 case '\r':
11900 if (peek_at(parser, parser->current.start + 2) != '\n') {
11901 parser->current.end++;
11902 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11903 break;
11904 }
11906 default:
11907 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11908 break;
11909 }
11910 } else if (char_is_ascii_printable(*parser->current.start)) {
11911 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11912 } else {
11913 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11914 }
11915
11916 goto lex_next_token;
11917 }
11918
11919 parser->current.end = parser->current.start + width;
11920 }
11921
11922 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11923
11924 // If we've hit a __END__ and it was at the start of the
11925 // line or the start of the file and it is followed by
11926 // either a \n or a \r\n, then this is the last token of the
11927 // file.
11928 if (
11929 ((parser->current.end - parser->current.start) == 7) &&
11930 current_token_starts_line(parser) &&
11931 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11932 (parser->current.end == parser->end || match_eol(parser))
11933 ) {
11934 // Since we know we're about to add an __END__ comment,
11935 // we know we need to add all of the newlines to get the
11936 // correct column information for it.
11937 const uint8_t *cursor = parser->current.end;
11938 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11939 pm_newline_list_append(&parser->newline_list, cursor++);
11940 }
11941
11942 parser->current.end = parser->end;
11943 parser->current.type = PM_TOKEN___END__;
11944 parser_lex_callback(parser);
11945
11946 parser->data_loc.start = parser->current.start;
11947 parser->data_loc.end = parser->current.end;
11948
11949 LEX(PM_TOKEN_EOF);
11950 }
11951
11952 pm_lex_state_t last_state = parser->lex_state;
11953
11954 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11955 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11956 if (previous_command_start) {
11957 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11958 } else {
11959 lex_state_set(parser, PM_LEX_STATE_ARG);
11960 }
11961 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11962 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11963 } else {
11964 lex_state_set(parser, PM_LEX_STATE_END);
11965 }
11966 }
11967
11968 if (
11969 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11970 (type == PM_TOKEN_IDENTIFIER) &&
11971 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11972 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11973 ) {
11974 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11975 }
11976
11977 LEX(type);
11978 }
11979 }
11980 }
11981 case PM_LEX_LIST: {
11982 if (parser->next_start != NULL) {
11983 parser->current.end = parser->next_start;
11984 parser->next_start = NULL;
11985 }
11986
11987 // First we'll set the beginning of the token.
11988 parser->current.start = parser->current.end;
11989
11990 // If there's any whitespace at the start of the list, then we're
11991 // going to trim it off the beginning and create a new token.
11992 size_t whitespace;
11993
11994 if (parser->heredoc_end) {
11995 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11996 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11997 whitespace += 1;
11998 }
11999 } else {
12000 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
12001 }
12002
12003 if (whitespace > 0) {
12004 parser->current.end += whitespace;
12005 if (peek_offset(parser, -1) == '\n') {
12006 // mutates next_start
12007 parser_flush_heredoc_end(parser);
12008 }
12009 LEX(PM_TOKEN_WORDS_SEP);
12010 }
12011
12012 // We'll check if we're at the end of the file. If we are, then we
12013 // need to return the EOF token.
12014 if (parser->current.end >= parser->end) {
12015 LEX(PM_TOKEN_EOF);
12016 }
12017
12018 // Here we'll get a list of the places where strpbrk should break,
12019 // and then find the first one.
12020 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12021 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
12022 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12023
12024 // If we haven't found an escape yet, then this buffer will be
12025 // unallocated since we can refer directly to the source string.
12026 pm_token_buffer_t token_buffer = { 0 };
12027
12028 while (breakpoint != NULL) {
12029 // If we hit whitespace, then we must have received content by
12030 // now, so we can return an element of the list.
12031 if (pm_char_is_whitespace(*breakpoint)) {
12032 parser->current.end = breakpoint;
12033 pm_token_buffer_flush(parser, &token_buffer);
12034 LEX(PM_TOKEN_STRING_CONTENT);
12035 }
12036
12037 // If we hit the terminator, we need to check which token to
12038 // return.
12039 if (*breakpoint == lex_mode->as.list.terminator) {
12040 // If this terminator doesn't actually close the list, then
12041 // we need to continue on past it.
12042 if (lex_mode->as.list.nesting > 0) {
12043 parser->current.end = breakpoint + 1;
12044 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12045 lex_mode->as.list.nesting--;
12046 continue;
12047 }
12048
12049 // If we've hit the terminator and we've already skipped
12050 // past content, then we can return a list node.
12051 if (breakpoint > parser->current.start) {
12052 parser->current.end = breakpoint;
12053 pm_token_buffer_flush(parser, &token_buffer);
12054 LEX(PM_TOKEN_STRING_CONTENT);
12055 }
12056
12057 // Otherwise, switch back to the default state and return
12058 // the end of the list.
12059 parser->current.end = breakpoint + 1;
12060 lex_mode_pop(parser);
12061 lex_state_set(parser, PM_LEX_STATE_END);
12062 LEX(PM_TOKEN_STRING_END);
12063 }
12064
12065 // If we hit a null byte, skip directly past it.
12066 if (*breakpoint == '\0') {
12067 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
12068 continue;
12069 }
12070
12071 // If we hit escapes, then we need to treat the next token
12072 // literally. In this case we'll skip past the next character
12073 // and find the next breakpoint.
12074 if (*breakpoint == '\\') {
12075 parser->current.end = breakpoint + 1;
12076
12077 // If we've hit the end of the file, then break out of the
12078 // loop by setting the breakpoint to NULL.
12079 if (parser->current.end == parser->end) {
12080 breakpoint = NULL;
12081 continue;
12082 }
12083
12084 pm_token_buffer_escape(parser, &token_buffer);
12085 uint8_t peeked = peek(parser);
12086
12087 switch (peeked) {
12088 case ' ':
12089 case '\f':
12090 case '\t':
12091 case '\v':
12092 case '\\':
12093 pm_token_buffer_push_byte(&token_buffer, peeked);
12094 parser->current.end++;
12095 break;
12096 case '\r':
12097 parser->current.end++;
12098 if (peek(parser) != '\n') {
12099 pm_token_buffer_push_byte(&token_buffer, '\r');
12100 break;
12101 }
12103 case '\n':
12104 pm_token_buffer_push_byte(&token_buffer, '\n');
12105
12106 if (parser->heredoc_end) {
12107 // ... if we are on the same line as a heredoc,
12108 // flush the heredoc and continue parsing after
12109 // heredoc_end.
12110 parser_flush_heredoc_end(parser);
12111 pm_token_buffer_copy(parser, &token_buffer);
12112 LEX(PM_TOKEN_STRING_CONTENT);
12113 } else {
12114 // ... else track the newline.
12115 pm_newline_list_append(&parser->newline_list, parser->current.end);
12116 }
12117
12118 parser->current.end++;
12119 break;
12120 default:
12121 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12122 pm_token_buffer_push_byte(&token_buffer, peeked);
12123 parser->current.end++;
12124 } else if (lex_mode->as.list.interpolation) {
12125 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12126 } else {
12127 pm_token_buffer_push_byte(&token_buffer, '\\');
12128 pm_token_buffer_push_escaped(&token_buffer, parser);
12129 }
12130
12131 break;
12132 }
12133
12134 token_buffer.cursor = parser->current.end;
12135 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12136 continue;
12137 }
12138
12139 // If we hit a #, then we will attempt to lex interpolation.
12140 if (*breakpoint == '#') {
12141 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12142
12143 if (type == PM_TOKEN_NOT_PROVIDED) {
12144 // If we haven't returned at this point then we had something
12145 // that looked like an interpolated class or instance variable
12146 // like "#@" but wasn't actually. In this case we'll just skip
12147 // to the next breakpoint.
12148 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12149 continue;
12150 }
12151
12152 if (type == PM_TOKEN_STRING_CONTENT) {
12153 pm_token_buffer_flush(parser, &token_buffer);
12154 }
12155
12156 LEX(type);
12157 }
12158
12159 // If we've hit the incrementor, then we need to skip past it
12160 // and find the next breakpoint.
12161 assert(*breakpoint == lex_mode->as.list.incrementor);
12162 parser->current.end = breakpoint + 1;
12163 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12164 lex_mode->as.list.nesting++;
12165 continue;
12166 }
12167
12168 if (parser->current.end > parser->current.start) {
12169 pm_token_buffer_flush(parser, &token_buffer);
12170 LEX(PM_TOKEN_STRING_CONTENT);
12171 }
12172
12173 // If we were unable to find a breakpoint, then this token hits the
12174 // end of the file.
12175 parser->current.end = parser->end;
12176 pm_token_buffer_flush(parser, &token_buffer);
12177 LEX(PM_TOKEN_STRING_CONTENT);
12178 }
12179 case PM_LEX_REGEXP: {
12180 // First, we'll set to start of this token to be the current end.
12181 if (parser->next_start == NULL) {
12182 parser->current.start = parser->current.end;
12183 } else {
12184 parser->current.start = parser->next_start;
12185 parser->current.end = parser->next_start;
12186 parser->next_start = NULL;
12187 }
12188
12189 // We'll check if we're at the end of the file. If we are, then we
12190 // need to return the EOF token.
12191 if (parser->current.end >= parser->end) {
12192 LEX(PM_TOKEN_EOF);
12193 }
12194
12195 // Get a reference to the current mode.
12196 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12197
12198 // These are the places where we need to split up the content of the
12199 // regular expression. We'll use strpbrk to find the first of these
12200 // characters.
12201 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12202 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12203 pm_regexp_token_buffer_t token_buffer = { 0 };
12204
12205 while (breakpoint != NULL) {
12206 uint8_t term = lex_mode->as.regexp.terminator;
12207 bool is_terminator = (*breakpoint == term);
12208
12209 // If the terminator is newline, we need to consider \r\n _also_ a newline
12210 // For example: `%\nfoo\r\n`
12211 // The string should be "foo", not "foo\r"
12212 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12213 if (term == '\n') {
12214 is_terminator = true;
12215 }
12216
12217 // If the terminator is a CR, but we see a CRLF, we need to
12218 // treat the CRLF as a newline, meaning this is _not_ the
12219 // terminator
12220 if (term == '\r') {
12221 is_terminator = false;
12222 }
12223 }
12224
12225 // If we hit the terminator, we need to determine what kind of
12226 // token to return.
12227 if (is_terminator) {
12228 if (lex_mode->as.regexp.nesting > 0) {
12229 parser->current.end = breakpoint + 1;
12230 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12231 lex_mode->as.regexp.nesting--;
12232 continue;
12233 }
12234
12235 // Here we've hit the terminator. If we have already consumed
12236 // content then we need to return that content as string content
12237 // first.
12238 if (breakpoint > parser->current.start) {
12239 parser->current.end = breakpoint;
12240 pm_regexp_token_buffer_flush(parser, &token_buffer);
12241 LEX(PM_TOKEN_STRING_CONTENT);
12242 }
12243
12244 // Check here if we need to track the newline.
12245 size_t eol_length = match_eol_at(parser, breakpoint);
12246 if (eol_length) {
12247 parser->current.end = breakpoint + eol_length;
12248 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12249 } else {
12250 parser->current.end = breakpoint + 1;
12251 }
12252
12253 // Since we've hit the terminator of the regular expression,
12254 // we now need to parse the options.
12255 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12256
12257 lex_mode_pop(parser);
12258 lex_state_set(parser, PM_LEX_STATE_END);
12259 LEX(PM_TOKEN_REGEXP_END);
12260 }
12261
12262 // If we've hit the incrementor, then we need to skip past it
12263 // and find the next breakpoint.
12264 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12265 parser->current.end = breakpoint + 1;
12266 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12267 lex_mode->as.regexp.nesting++;
12268 continue;
12269 }
12270
12271 switch (*breakpoint) {
12272 case '\0':
12273 // If we hit a null byte, skip directly past it.
12274 parser->current.end = breakpoint + 1;
12275 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12276 break;
12277 case '\r':
12278 if (peek_at(parser, breakpoint + 1) != '\n') {
12279 parser->current.end = breakpoint + 1;
12280 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12281 break;
12282 }
12283
12284 breakpoint++;
12285 parser->current.end = breakpoint;
12286 pm_regexp_token_buffer_escape(parser, &token_buffer);
12287 token_buffer.base.cursor = breakpoint;
12288
12290 case '\n':
12291 // If we've hit a newline, then we need to track that in
12292 // the list of newlines.
12293 if (parser->heredoc_end == NULL) {
12294 pm_newline_list_append(&parser->newline_list, breakpoint);
12295 parser->current.end = breakpoint + 1;
12296 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12297 break;
12298 }
12299
12300 parser->current.end = breakpoint + 1;
12301 parser_flush_heredoc_end(parser);
12302 pm_regexp_token_buffer_flush(parser, &token_buffer);
12303 LEX(PM_TOKEN_STRING_CONTENT);
12304 case '\\': {
12305 // If we hit escapes, then we need to treat the next
12306 // token literally. In this case we'll skip past the
12307 // next character and find the next breakpoint.
12308 parser->current.end = breakpoint + 1;
12309
12310 // If we've hit the end of the file, then break out of
12311 // the loop by setting the breakpoint to NULL.
12312 if (parser->current.end == parser->end) {
12313 breakpoint = NULL;
12314 break;
12315 }
12316
12317 pm_regexp_token_buffer_escape(parser, &token_buffer);
12318 uint8_t peeked = peek(parser);
12319
12320 switch (peeked) {
12321 case '\r':
12322 parser->current.end++;
12323 if (peek(parser) != '\n') {
12324 if (lex_mode->as.regexp.terminator != '\r') {
12325 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12326 }
12327 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12328 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12329 break;
12330 }
12332 case '\n':
12333 if (parser->heredoc_end) {
12334 // ... if we are on the same line as a heredoc,
12335 // flush the heredoc and continue parsing after
12336 // heredoc_end.
12337 parser_flush_heredoc_end(parser);
12338 pm_regexp_token_buffer_copy(parser, &token_buffer);
12339 LEX(PM_TOKEN_STRING_CONTENT);
12340 } else {
12341 // ... else track the newline.
12342 pm_newline_list_append(&parser->newline_list, parser->current.end);
12343 }
12344
12345 parser->current.end++;
12346 break;
12347 case 'c':
12348 case 'C':
12349 case 'M':
12350 case 'u':
12351 case 'x':
12352 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12353 break;
12354 default:
12355 if (lex_mode->as.regexp.terminator == peeked) {
12356 // Some characters when they are used as the
12357 // terminator also receive an escape. They are
12358 // enumerated here.
12359 switch (peeked) {
12360 case '$': case ')': case '*': case '+':
12361 case '.': case '>': case '?': case ']':
12362 case '^': case '|': case '}':
12363 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12364 break;
12365 default:
12366 break;
12367 }
12368
12369 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12370 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12371 parser->current.end++;
12372 break;
12373 }
12374
12375 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12376 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12377 break;
12378 }
12379
12380 token_buffer.base.cursor = parser->current.end;
12381 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12382 break;
12383 }
12384 case '#': {
12385 // If we hit a #, then we will attempt to lex
12386 // interpolation.
12387 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12388
12389 if (type == PM_TOKEN_NOT_PROVIDED) {
12390 // If we haven't returned at this point then we had
12391 // something that looked like an interpolated class or
12392 // instance variable like "#@" but wasn't actually. In
12393 // this case we'll just skip to the next breakpoint.
12394 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12395 break;
12396 }
12397
12398 if (type == PM_TOKEN_STRING_CONTENT) {
12399 pm_regexp_token_buffer_flush(parser, &token_buffer);
12400 }
12401
12402 LEX(type);
12403 }
12404 default:
12405 assert(false && "unreachable");
12406 break;
12407 }
12408 }
12409
12410 if (parser->current.end > parser->current.start) {
12411 pm_regexp_token_buffer_flush(parser, &token_buffer);
12412 LEX(PM_TOKEN_STRING_CONTENT);
12413 }
12414
12415 // If we were unable to find a breakpoint, then this token hits the
12416 // end of the file.
12417 parser->current.end = parser->end;
12418 pm_regexp_token_buffer_flush(parser, &token_buffer);
12419 LEX(PM_TOKEN_STRING_CONTENT);
12420 }
12421 case PM_LEX_STRING: {
12422 // First, we'll set to start of this token to be the current end.
12423 if (parser->next_start == NULL) {
12424 parser->current.start = parser->current.end;
12425 } else {
12426 parser->current.start = parser->next_start;
12427 parser->current.end = parser->next_start;
12428 parser->next_start = NULL;
12429 }
12430
12431 // We'll check if we're at the end of the file. If we are, then we need to
12432 // return the EOF token.
12433 if (parser->current.end >= parser->end) {
12434 LEX(PM_TOKEN_EOF);
12435 }
12436
12437 // These are the places where we need to split up the content of the
12438 // string. We'll use strpbrk to find the first of these characters.
12439 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12440 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12441 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12442
12443 // If we haven't found an escape yet, then this buffer will be
12444 // unallocated since we can refer directly to the source string.
12445 pm_token_buffer_t token_buffer = { 0 };
12446
12447 while (breakpoint != NULL) {
12448 // If we hit the incrementor, then we'll increment then nesting and
12449 // continue lexing.
12450 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12451 lex_mode->as.string.nesting++;
12452 parser->current.end = breakpoint + 1;
12453 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12454 continue;
12455 }
12456
12457 uint8_t term = lex_mode->as.string.terminator;
12458 bool is_terminator = (*breakpoint == term);
12459
12460 // If the terminator is newline, we need to consider \r\n _also_ a newline
12461 // For example: `%r\nfoo\r\n`
12462 // The string should be /foo/, not /foo\r/
12463 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12464 if (term == '\n') {
12465 is_terminator = true;
12466 }
12467
12468 // If the terminator is a CR, but we see a CRLF, we need to
12469 // treat the CRLF as a newline, meaning this is _not_ the
12470 // terminator
12471 if (term == '\r') {
12472 is_terminator = false;
12473 }
12474 }
12475
12476 // Note that we have to check the terminator here first because we could
12477 // potentially be parsing a % string that has a # character as the
12478 // terminator.
12479 if (is_terminator) {
12480 // If this terminator doesn't actually close the string, then we need
12481 // to continue on past it.
12482 if (lex_mode->as.string.nesting > 0) {
12483 parser->current.end = breakpoint + 1;
12484 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12485 lex_mode->as.string.nesting--;
12486 continue;
12487 }
12488
12489 // Here we've hit the terminator. If we have already consumed content
12490 // then we need to return that content as string content first.
12491 if (breakpoint > parser->current.start) {
12492 parser->current.end = breakpoint;
12493 pm_token_buffer_flush(parser, &token_buffer);
12494 LEX(PM_TOKEN_STRING_CONTENT);
12495 }
12496
12497 // Otherwise we need to switch back to the parent lex mode and
12498 // return the end of the string.
12499 size_t eol_length = match_eol_at(parser, breakpoint);
12500 if (eol_length) {
12501 parser->current.end = breakpoint + eol_length;
12502 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12503 } else {
12504 parser->current.end = breakpoint + 1;
12505 }
12506
12507 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12508 parser->current.end++;
12509 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12510 lex_mode_pop(parser);
12511 LEX(PM_TOKEN_LABEL_END);
12512 }
12513
12514 lex_state_set(parser, PM_LEX_STATE_END);
12515 lex_mode_pop(parser);
12516 LEX(PM_TOKEN_STRING_END);
12517 }
12518
12519 switch (*breakpoint) {
12520 case '\0':
12521 // Skip directly past the null character.
12522 parser->current.end = breakpoint + 1;
12523 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12524 break;
12525 case '\r':
12526 if (peek_at(parser, breakpoint + 1) != '\n') {
12527 parser->current.end = breakpoint + 1;
12528 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12529 break;
12530 }
12531
12532 // If we hit a \r\n sequence, then we need to treat it
12533 // as a newline.
12534 breakpoint++;
12535 parser->current.end = breakpoint;
12536 pm_token_buffer_escape(parser, &token_buffer);
12537 token_buffer.cursor = breakpoint;
12538
12540 case '\n':
12541 // When we hit a newline, we need to flush any potential
12542 // heredocs. Note that this has to happen after we check
12543 // for the terminator in case the terminator is a
12544 // newline character.
12545 if (parser->heredoc_end == NULL) {
12546 pm_newline_list_append(&parser->newline_list, breakpoint);
12547 parser->current.end = breakpoint + 1;
12548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12549 break;
12550 }
12551
12552 parser->current.end = breakpoint + 1;
12553 parser_flush_heredoc_end(parser);
12554 pm_token_buffer_flush(parser, &token_buffer);
12555 LEX(PM_TOKEN_STRING_CONTENT);
12556 case '\\': {
12557 // Here we hit escapes.
12558 parser->current.end = breakpoint + 1;
12559
12560 // If we've hit the end of the file, then break out of
12561 // the loop by setting the breakpoint to NULL.
12562 if (parser->current.end == parser->end) {
12563 breakpoint = NULL;
12564 continue;
12565 }
12566
12567 pm_token_buffer_escape(parser, &token_buffer);
12568 uint8_t peeked = peek(parser);
12569
12570 switch (peeked) {
12571 case '\\':
12572 pm_token_buffer_push_byte(&token_buffer, '\\');
12573 parser->current.end++;
12574 break;
12575 case '\r':
12576 parser->current.end++;
12577 if (peek(parser) != '\n') {
12578 if (!lex_mode->as.string.interpolation) {
12579 pm_token_buffer_push_byte(&token_buffer, '\\');
12580 }
12581 pm_token_buffer_push_byte(&token_buffer, '\r');
12582 break;
12583 }
12585 case '\n':
12586 if (!lex_mode->as.string.interpolation) {
12587 pm_token_buffer_push_byte(&token_buffer, '\\');
12588 pm_token_buffer_push_byte(&token_buffer, '\n');
12589 }
12590
12591 if (parser->heredoc_end) {
12592 // ... if we are on the same line as a heredoc,
12593 // flush the heredoc and continue parsing after
12594 // heredoc_end.
12595 parser_flush_heredoc_end(parser);
12596 pm_token_buffer_copy(parser, &token_buffer);
12597 LEX(PM_TOKEN_STRING_CONTENT);
12598 } else {
12599 // ... else track the newline.
12600 pm_newline_list_append(&parser->newline_list, parser->current.end);
12601 }
12602
12603 parser->current.end++;
12604 break;
12605 default:
12606 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12607 pm_token_buffer_push_byte(&token_buffer, peeked);
12608 parser->current.end++;
12609 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12610 pm_token_buffer_push_byte(&token_buffer, peeked);
12611 parser->current.end++;
12612 } else if (lex_mode->as.string.interpolation) {
12613 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12614 } else {
12615 pm_token_buffer_push_byte(&token_buffer, '\\');
12616 pm_token_buffer_push_escaped(&token_buffer, parser);
12617 }
12618
12619 break;
12620 }
12621
12622 token_buffer.cursor = parser->current.end;
12623 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12624 break;
12625 }
12626 case '#': {
12627 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12628
12629 if (type == PM_TOKEN_NOT_PROVIDED) {
12630 // If we haven't returned at this point then we had something that
12631 // looked like an interpolated class or instance variable like "#@"
12632 // but wasn't actually. In this case we'll just skip to the next
12633 // breakpoint.
12634 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12635 break;
12636 }
12637
12638 if (type == PM_TOKEN_STRING_CONTENT) {
12639 pm_token_buffer_flush(parser, &token_buffer);
12640 }
12641
12642 LEX(type);
12643 }
12644 default:
12645 assert(false && "unreachable");
12646 }
12647 }
12648
12649 if (parser->current.end > parser->current.start) {
12650 pm_token_buffer_flush(parser, &token_buffer);
12651 LEX(PM_TOKEN_STRING_CONTENT);
12652 }
12653
12654 // If we've hit the end of the string, then this is an unterminated
12655 // string. In that case we'll return a string content token.
12656 parser->current.end = parser->end;
12657 pm_token_buffer_flush(parser, &token_buffer);
12658 LEX(PM_TOKEN_STRING_CONTENT);
12659 }
12660 case PM_LEX_HEREDOC: {
12661 // First, we'll set to start of this token.
12662 if (parser->next_start == NULL) {
12663 parser->current.start = parser->current.end;
12664 } else {
12665 parser->current.start = parser->next_start;
12666 parser->current.end = parser->next_start;
12667 parser->heredoc_end = NULL;
12668 parser->next_start = NULL;
12669 }
12670
12671 // Now let's grab the information about the identifier off of the
12672 // current lex mode.
12673 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12674 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12675
12676 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12677 lex_mode->as.heredoc.line_continuation = false;
12678
12679 // We'll check if we're at the end of the file. If we are, then we
12680 // will add an error (because we weren't able to find the
12681 // terminator) but still continue parsing so that content after the
12682 // declaration of the heredoc can be parsed.
12683 if (parser->current.end >= parser->end) {
12684 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12685 parser->next_start = lex_mode->as.heredoc.next_start;
12686 parser->heredoc_end = parser->current.end;
12687 lex_state_set(parser, PM_LEX_STATE_END);
12688 lex_mode_pop(parser);
12689 LEX(PM_TOKEN_HEREDOC_END);
12690 }
12691
12692 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12693 size_t ident_length = heredoc_lex_mode->ident_length;
12694
12695 // If we are immediately following a newline and we have hit the
12696 // terminator, then we need to return the ending of the heredoc.
12697 if (current_token_starts_line(parser)) {
12698 const uint8_t *start = parser->current.start;
12699
12700 if (!line_continuation && (start + ident_length <= parser->end)) {
12701 const uint8_t *newline = next_newline(start, parser->end - start);
12702 const uint8_t *ident_end = newline;
12703 const uint8_t *terminator_end = newline;
12704
12705 if (newline == NULL) {
12706 terminator_end = parser->end;
12707 ident_end = parser->end;
12708 } else {
12709 terminator_end++;
12710 if (newline[-1] == '\r') {
12711 ident_end--; // Remove \r
12712 }
12713 }
12714
12715 const uint8_t *terminator_start = ident_end - ident_length;
12716 const uint8_t *cursor = start;
12717
12718 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12719 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12720 cursor++;
12721 }
12722 }
12723
12724 if (
12725 (cursor == terminator_start) &&
12726 (memcmp(terminator_start, ident_start, ident_length) == 0)
12727 ) {
12728 if (newline != NULL) {
12729 pm_newline_list_append(&parser->newline_list, newline);
12730 }
12731
12732 parser->current.end = terminator_end;
12733 if (*lex_mode->as.heredoc.next_start == '\\') {
12734 parser->next_start = NULL;
12735 } else {
12736 parser->next_start = lex_mode->as.heredoc.next_start;
12737 parser->heredoc_end = parser->current.end;
12738 }
12739
12740 lex_state_set(parser, PM_LEX_STATE_END);
12741 lex_mode_pop(parser);
12742 LEX(PM_TOKEN_HEREDOC_END);
12743 }
12744 }
12745
12746 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12747 if (
12748 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12749 lex_mode->as.heredoc.common_whitespace != NULL &&
12750 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12751 peek_at(parser, start) != '\n'
12752 ) {
12753 *lex_mode->as.heredoc.common_whitespace = whitespace;
12754 }
12755 }
12756
12757 // Otherwise we'll be parsing string content. These are the places
12758 // where we need to split up the content of the heredoc. We'll use
12759 // strpbrk to find the first of these characters.
12760 uint8_t breakpoints[] = "\r\n\\#";
12761
12762 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12763 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12764 breakpoints[3] = '\0';
12765 }
12766
12767 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12768 pm_token_buffer_t token_buffer = { 0 };
12769 bool was_line_continuation = false;
12770
12771 while (breakpoint != NULL) {
12772 switch (*breakpoint) {
12773 case '\0':
12774 // Skip directly past the null character.
12775 parser->current.end = breakpoint + 1;
12776 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12777 break;
12778 case '\r':
12779 parser->current.end = breakpoint + 1;
12780
12781 if (peek_at(parser, breakpoint + 1) != '\n') {
12782 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12783 break;
12784 }
12785
12786 // If we hit a \r\n sequence, then we want to replace it
12787 // with a single \n character in the final string.
12788 breakpoint++;
12789 pm_token_buffer_escape(parser, &token_buffer);
12790 token_buffer.cursor = breakpoint;
12791
12793 case '\n': {
12794 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12795 parser_flush_heredoc_end(parser);
12796 parser->current.end = breakpoint + 1;
12797 pm_token_buffer_flush(parser, &token_buffer);
12798 LEX(PM_TOKEN_STRING_CONTENT);
12799 }
12800
12801 pm_newline_list_append(&parser->newline_list, breakpoint);
12802
12803 // If we have a - or ~ heredoc, then we can match after
12804 // some leading whitespace.
12805 const uint8_t *start = breakpoint + 1;
12806
12807 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12808 // We want to match the terminator starting from the end of the line in case
12809 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12810 const uint8_t *newline = next_newline(start, parser->end - start);
12811
12812 if (newline == NULL) {
12813 newline = parser->end;
12814 } else if (newline[-1] == '\r') {
12815 newline--; // Remove \r
12816 }
12817
12818 // Start of a possible terminator.
12819 const uint8_t *terminator_start = newline - ident_length;
12820
12821 // Cursor to check for the leading whitespace. We skip the
12822 // leading whitespace if we have a - or ~ heredoc.
12823 const uint8_t *cursor = start;
12824
12825 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12826 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12827 cursor++;
12828 }
12829 }
12830
12831 if (
12832 cursor == terminator_start &&
12833 (memcmp(terminator_start, ident_start, ident_length) == 0)
12834 ) {
12835 parser->current.end = breakpoint + 1;
12836 pm_token_buffer_flush(parser, &token_buffer);
12837 LEX(PM_TOKEN_STRING_CONTENT);
12838 }
12839 }
12840
12841 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12842
12843 // If we have hit a newline that is followed by a valid
12844 // terminator, then we need to return the content of the
12845 // heredoc here as string content. Then, the next time a
12846 // token is lexed, it will match again and return the
12847 // end of the heredoc.
12848 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12849 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12850 *lex_mode->as.heredoc.common_whitespace = whitespace;
12851 }
12852
12853 parser->current.end = breakpoint + 1;
12854 pm_token_buffer_flush(parser, &token_buffer);
12855 LEX(PM_TOKEN_STRING_CONTENT);
12856 }
12857
12858 // Otherwise we hit a newline and it wasn't followed by
12859 // a terminator, so we can continue parsing.
12860 parser->current.end = breakpoint + 1;
12861 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12862 break;
12863 }
12864 case '\\': {
12865 // If we hit an escape, then we need to skip past
12866 // however many characters the escape takes up. However
12867 // it's important that if \n or \r\n are escaped, we
12868 // stop looping before the newline and not after the
12869 // newline so that we can still potentially find the
12870 // terminator of the heredoc.
12871 parser->current.end = breakpoint + 1;
12872
12873 // If we've hit the end of the file, then break out of
12874 // the loop by setting the breakpoint to NULL.
12875 if (parser->current.end == parser->end) {
12876 breakpoint = NULL;
12877 continue;
12878 }
12879
12880 pm_token_buffer_escape(parser, &token_buffer);
12881 uint8_t peeked = peek(parser);
12882
12883 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12884 switch (peeked) {
12885 case '\r':
12886 parser->current.end++;
12887 if (peek(parser) != '\n') {
12888 pm_token_buffer_push_byte(&token_buffer, '\\');
12889 pm_token_buffer_push_byte(&token_buffer, '\r');
12890 break;
12891 }
12893 case '\n':
12894 pm_token_buffer_push_byte(&token_buffer, '\\');
12895 pm_token_buffer_push_byte(&token_buffer, '\n');
12896 token_buffer.cursor = parser->current.end + 1;
12897 breakpoint = parser->current.end;
12898 continue;
12899 default:
12900 pm_token_buffer_push_byte(&token_buffer, '\\');
12901 pm_token_buffer_push_escaped(&token_buffer, parser);
12902 break;
12903 }
12904 } else {
12905 switch (peeked) {
12906 case '\r':
12907 parser->current.end++;
12908 if (peek(parser) != '\n') {
12909 pm_token_buffer_push_byte(&token_buffer, '\r');
12910 break;
12911 }
12913 case '\n':
12914 // If we are in a tilde here, we should
12915 // break out of the loop and return the
12916 // string content.
12917 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12918 const uint8_t *end = parser->current.end;
12919 pm_newline_list_append(&parser->newline_list, end);
12920
12921 // Here we want the buffer to only
12922 // include up to the backslash.
12923 parser->current.end = breakpoint;
12924 pm_token_buffer_flush(parser, &token_buffer);
12925
12926 // Now we can advance the end of the
12927 // token past the newline.
12928 parser->current.end = end + 1;
12929 lex_mode->as.heredoc.line_continuation = true;
12930 LEX(PM_TOKEN_STRING_CONTENT);
12931 }
12932
12933 was_line_continuation = true;
12934 token_buffer.cursor = parser->current.end + 1;
12935 breakpoint = parser->current.end;
12936 continue;
12937 default:
12938 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12939 break;
12940 }
12941 }
12942
12943 token_buffer.cursor = parser->current.end;
12944 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12945 break;
12946 }
12947 case '#': {
12948 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12949
12950 if (type == PM_TOKEN_NOT_PROVIDED) {
12951 // If we haven't returned at this point then we had
12952 // something that looked like an interpolated class
12953 // or instance variable like "#@" but wasn't
12954 // actually. In this case we'll just skip to the
12955 // next breakpoint.
12956 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12957 break;
12958 }
12959
12960 if (type == PM_TOKEN_STRING_CONTENT) {
12961 pm_token_buffer_flush(parser, &token_buffer);
12962 }
12963
12964 LEX(type);
12965 }
12966 default:
12967 assert(false && "unreachable");
12968 }
12969
12970 was_line_continuation = false;
12971 }
12972
12973 if (parser->current.end > parser->current.start) {
12974 parser->current.end = parser->end;
12975 pm_token_buffer_flush(parser, &token_buffer);
12976 LEX(PM_TOKEN_STRING_CONTENT);
12977 }
12978
12979 // If we've hit the end of the string, then this is an unterminated
12980 // heredoc. In that case we'll return a string content token.
12981 parser->current.end = parser->end;
12982 pm_token_buffer_flush(parser, &token_buffer);
12983 LEX(PM_TOKEN_STRING_CONTENT);
12984 }
12985 }
12986
12987 assert(false && "unreachable");
12988}
12989
12990#undef LEX
12991
12992/******************************************************************************/
12993/* Parse functions */
12994/******************************************************************************/
12995
13004typedef enum {
13005 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
13006 PM_BINDING_POWER_STATEMENT = 2,
13007 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
13008 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
13009 PM_BINDING_POWER_COMPOSITION = 8, // and or
13010 PM_BINDING_POWER_NOT = 10, // not
13011 PM_BINDING_POWER_MATCH = 12, // => in
13012 PM_BINDING_POWER_DEFINED = 14, // defined?
13013 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
13014 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
13015 PM_BINDING_POWER_TERNARY = 20, // ?:
13016 PM_BINDING_POWER_RANGE = 22, // .. ...
13017 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
13018 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
13019 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
13020 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
13021 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
13022 PM_BINDING_POWER_BITWISE_AND = 34, // &
13023 PM_BINDING_POWER_SHIFT = 36, // << >>
13024 PM_BINDING_POWER_TERM = 38, // + -
13025 PM_BINDING_POWER_FACTOR = 40, // * / %
13026 PM_BINDING_POWER_UMINUS = 42, // -@
13027 PM_BINDING_POWER_EXPONENT = 44, // **
13028 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
13029 PM_BINDING_POWER_INDEX = 48, // [] []=
13030 PM_BINDING_POWER_CALL = 50, // :: .
13031 PM_BINDING_POWER_MAX = 52
13032} pm_binding_power_t;
13033
13038typedef struct {
13040 pm_binding_power_t left;
13041
13043 pm_binding_power_t right;
13044
13047
13054
13055#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
13056#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
13057#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
13058#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
13059#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
13060
13061pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
13062 // rescue
13063 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
13064
13065 // if unless until while
13066 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13067 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13068 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13069 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13070
13071 // and or
13072 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13073 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13074
13075 // => in
13076 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13077 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13078
13079 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
13080 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13081 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13082 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13083 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13084 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13085 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13086 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13087 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13088 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13089 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13090 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13091 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13092 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13093 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13094
13095 // ?:
13096 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13097
13098 // .. ...
13099 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13100 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13101 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13102 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13103
13104 // ||
13105 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13106
13107 // &&
13108 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13109
13110 // != !~ == === =~ <=>
13111 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13112 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13113 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13114 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13115 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13116 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13117
13118 // > >= < <=
13119 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13120 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13121 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13122 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13123
13124 // ^ |
13125 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13126 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13127
13128 // &
13129 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13130
13131 // >> <<
13132 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13133 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13134
13135 // - +
13136 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13137 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13138
13139 // % / *
13140 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13141 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13142 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13143 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13144
13145 // -@
13146 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13147 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13148
13149 // **
13150 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13151 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13152
13153 // ! ~ +@
13154 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13155 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13156 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13157
13158 // [
13159 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13160
13161 // :: . &.
13162 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13163 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13164 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13165};
13166
13167#undef BINDING_POWER_ASSIGNMENT
13168#undef LEFT_ASSOCIATIVE
13169#undef RIGHT_ASSOCIATIVE
13170#undef RIGHT_ASSOCIATIVE_UNARY
13171
13175static inline bool
13176match1(const pm_parser_t *parser, pm_token_type_t type) {
13177 return parser->current.type == type;
13178}
13179
13183static inline bool
13184match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13185 return match1(parser, type1) || match1(parser, type2);
13186}
13187
13191static inline bool
13192match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13193 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13194}
13195
13199static inline bool
13200match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13201 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13202}
13203
13207static inline bool
13208match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13209 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13210}
13211
13215static inline bool
13216match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13217 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13218}
13219
13226static bool
13227accept1(pm_parser_t *parser, pm_token_type_t type) {
13228 if (match1(parser, type)) {
13229 parser_lex(parser);
13230 return true;
13231 }
13232 return false;
13233}
13234
13239static inline bool
13240accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13241 if (match2(parser, type1, type2)) {
13242 parser_lex(parser);
13243 return true;
13244 }
13245 return false;
13246}
13247
13259static void
13260expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13261 if (accept1(parser, type)) return;
13262
13263 const uint8_t *location = parser->previous.end;
13264 pm_parser_err(parser, location, location, diag_id);
13265
13266 parser->previous.start = location;
13267 parser->previous.type = PM_TOKEN_MISSING;
13268}
13269
13274static void
13275expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13276 if (accept2(parser, type1, type2)) return;
13277
13278 const uint8_t *location = parser->previous.end;
13279 pm_parser_err(parser, location, location, diag_id);
13280
13281 parser->previous.start = location;
13282 parser->previous.type = PM_TOKEN_MISSING;
13283}
13284
13289static void
13290expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13291 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13292 parser_lex(parser);
13293 } else {
13294 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13295 parser->previous.start = parser->previous.end;
13296 parser->previous.type = PM_TOKEN_MISSING;
13297 }
13298}
13299
13300static pm_node_t *
13301parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13302
13307static pm_node_t *
13308parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13309 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13310 pm_assert_value_expression(parser, node);
13311 return node;
13312}
13313
13332static inline bool
13333token_begins_expression_p(pm_token_type_t type) {
13334 switch (type) {
13335 case PM_TOKEN_EQUAL_GREATER:
13336 case PM_TOKEN_KEYWORD_IN:
13337 // We need to special case this because it is a binary operator that
13338 // should not be marked as beginning an expression.
13339 return false;
13340 case PM_TOKEN_BRACE_RIGHT:
13341 case PM_TOKEN_BRACKET_RIGHT:
13342 case PM_TOKEN_COLON:
13343 case PM_TOKEN_COMMA:
13344 case PM_TOKEN_EMBEXPR_END:
13345 case PM_TOKEN_EOF:
13346 case PM_TOKEN_LAMBDA_BEGIN:
13347 case PM_TOKEN_KEYWORD_DO:
13348 case PM_TOKEN_KEYWORD_DO_LOOP:
13349 case PM_TOKEN_KEYWORD_END:
13350 case PM_TOKEN_KEYWORD_ELSE:
13351 case PM_TOKEN_KEYWORD_ELSIF:
13352 case PM_TOKEN_KEYWORD_ENSURE:
13353 case PM_TOKEN_KEYWORD_THEN:
13354 case PM_TOKEN_KEYWORD_RESCUE:
13355 case PM_TOKEN_KEYWORD_WHEN:
13356 case PM_TOKEN_NEWLINE:
13357 case PM_TOKEN_PARENTHESIS_RIGHT:
13358 case PM_TOKEN_SEMICOLON:
13359 // The reason we need this short-circuit is because we're using the
13360 // binding powers table to tell us if the subsequent token could
13361 // potentially be the start of an expression. If there _is_ a binding
13362 // power for one of these tokens, then we should remove it from this list
13363 // and let it be handled by the default case below.
13364 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13365 return false;
13366 case PM_TOKEN_UAMPERSAND:
13367 // This is a special case because this unary operator cannot appear
13368 // as a general operator, it only appears in certain circumstances.
13369 return false;
13370 case PM_TOKEN_UCOLON_COLON:
13371 case PM_TOKEN_UMINUS:
13372 case PM_TOKEN_UMINUS_NUM:
13373 case PM_TOKEN_UPLUS:
13374 case PM_TOKEN_BANG:
13375 case PM_TOKEN_TILDE:
13376 case PM_TOKEN_UDOT_DOT:
13377 case PM_TOKEN_UDOT_DOT_DOT:
13378 // These unary tokens actually do have binding power associated with them
13379 // so that we can correctly place them into the precedence order. But we
13380 // want them to be marked as beginning an expression, so we need to
13381 // special case them here.
13382 return true;
13383 default:
13384 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13385 }
13386}
13387
13392static pm_node_t *
13393parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13394 if (accept1(parser, PM_TOKEN_USTAR)) {
13395 pm_token_t operator = parser->previous;
13396 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13397 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13398 }
13399
13400 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13401}
13402
13407static void
13408parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13409 // The method name needs to change. If we previously had
13410 // foo, we now need foo=. In this case we'll allocate a new
13411 // owned string, copy the previous method name in, and
13412 // append an =.
13413 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13414 size_t length = constant->length;
13415 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13416 if (name == NULL) return;
13417
13418 memcpy(name, constant->start, length);
13419 name[length] = '=';
13420
13421 // Now switch the name to the new string.
13422 // This silences clang analyzer warning about leak of memory pointed by `name`.
13423 // NOLINTNEXTLINE(clang-analyzer-*)
13424 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13425}
13426
13433static pm_node_t *
13434parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13435 switch (PM_NODE_TYPE(target)) {
13436 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13437 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13438 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13439 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13440 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13441 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13442 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13443 default: break;
13444 }
13445
13446 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13447 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13448
13449 pm_node_destroy(parser, target);
13450 return (pm_node_t *) result;
13451}
13452
13458static void
13459parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13460 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13461
13462 for (size_t index = 0; index < implicit_parameters->size; index++) {
13463 if (implicit_parameters->nodes[index] == node) {
13464 // If the node is not the last one in the list, we need to shift the
13465 // remaining nodes down to fill the gap. This is extremely unlikely
13466 // to happen.
13467 if (index != implicit_parameters->size - 1) {
13468 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13469 }
13470
13471 implicit_parameters->size--;
13472 break;
13473 }
13474 }
13475}
13476
13485static pm_node_t *
13486parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13487 switch (PM_NODE_TYPE(target)) {
13488 case PM_MISSING_NODE:
13489 return target;
13490 case PM_SOURCE_ENCODING_NODE:
13491 case PM_FALSE_NODE:
13492 case PM_SOURCE_FILE_NODE:
13493 case PM_SOURCE_LINE_NODE:
13494 case PM_NIL_NODE:
13495 case PM_SELF_NODE:
13496 case PM_TRUE_NODE: {
13497 // In these special cases, we have specific error messages and we
13498 // will replace them with local variable writes.
13499 return parse_unwriteable_target(parser, target);
13500 }
13501 case PM_CLASS_VARIABLE_READ_NODE:
13503 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
13504 return target;
13505 case PM_CONSTANT_PATH_NODE:
13506 if (context_def_p(parser)) {
13507 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13508 }
13509
13511 target->type = PM_CONSTANT_PATH_TARGET_NODE;
13512
13513 return target;
13514 case PM_CONSTANT_READ_NODE:
13515 if (context_def_p(parser)) {
13516 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13517 }
13518
13519 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13520 target->type = PM_CONSTANT_TARGET_NODE;
13521
13522 return target;
13523 case PM_BACK_REFERENCE_READ_NODE:
13524 case PM_NUMBERED_REFERENCE_READ_NODE:
13525 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13526 return target;
13527 case PM_GLOBAL_VARIABLE_READ_NODE:
13529 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13530 return target;
13531 case PM_LOCAL_VARIABLE_READ_NODE: {
13532 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13533 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13534 parse_target_implicit_parameter(parser, target);
13535 }
13536
13537 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13538 uint32_t name = cast->name;
13539 uint32_t depth = cast->depth;
13540 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13541
13543 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13544
13545 return target;
13546 }
13547 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13548 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13549 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13550
13551 parse_target_implicit_parameter(parser, target);
13552 pm_node_destroy(parser, target);
13553
13554 return node;
13555 }
13556 case PM_INSTANCE_VARIABLE_READ_NODE:
13558 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13559 return target;
13560 case PM_MULTI_TARGET_NODE:
13561 if (splat_parent) {
13562 // Multi target is not accepted in all positions. If this is one
13563 // of them, then we need to add an error.
13564 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13565 }
13566
13567 return target;
13568 case PM_SPLAT_NODE: {
13569 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13570
13571 if (splat->expression != NULL) {
13572 splat->expression = parse_target(parser, splat->expression, multiple, true);
13573 }
13574
13575 return (pm_node_t *) splat;
13576 }
13577 case PM_CALL_NODE: {
13578 pm_call_node_t *call = (pm_call_node_t *) target;
13579
13580 // If we have no arguments to the call node and we need this to be a
13581 // target then this is either a method call or a local variable
13582 // write.
13583 if (
13584 (call->message_loc.start != NULL) &&
13585 (call->message_loc.end[-1] != '!') &&
13586 (call->message_loc.end[-1] != '?') &&
13587 (call->opening_loc.start == NULL) &&
13588 (call->arguments == NULL) &&
13589 (call->block == NULL)
13590 ) {
13591 if (call->receiver == NULL) {
13592 // When we get here, we have a local variable write, because it
13593 // was previously marked as a method call but now we have an =.
13594 // This looks like:
13595 //
13596 // foo = 1
13597 //
13598 // When it was parsed in the prefix position, foo was seen as a
13599 // method call with no receiver and no arguments. Now we have an
13600 // =, so we know it's a local variable write.
13601 const pm_location_t message_loc = call->message_loc;
13602
13603 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13604 pm_node_destroy(parser, target);
13605
13606 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13607 }
13608
13609 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13610 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13611 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13612 }
13613
13614 parse_write_name(parser, &call->name);
13615 return (pm_node_t *) pm_call_target_node_create(parser, call);
13616 }
13617 }
13618
13619 // If there is no call operator and the message is "[]" then this is
13620 // an aref expression, and we can transform it into an aset
13621 // expression.
13622 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13623 return (pm_node_t *) pm_index_target_node_create(parser, call);
13624 }
13625 }
13627 default:
13628 // In this case we have a node that we don't know how to convert
13629 // into a target. We need to treat it as an error. For now, we'll
13630 // mark it as an error and just skip right past it.
13631 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13632 return target;
13633 }
13634}
13635
13640static pm_node_t *
13641parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13642 pm_node_t *result = parse_target(parser, target, multiple, false);
13643
13644 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13645 // parens after the targets.
13646 if (
13647 !match1(parser, PM_TOKEN_EQUAL) &&
13648 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13649 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13650 ) {
13651 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13652 }
13653
13654 return result;
13655}
13656
13661static pm_node_t *
13662parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13663 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13664
13665 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13666 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13667 }
13668
13669 return write;
13670}
13671
13675static pm_node_t *
13676parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13677 switch (PM_NODE_TYPE(target)) {
13678 case PM_MISSING_NODE:
13679 pm_node_destroy(parser, value);
13680 return target;
13681 case PM_CLASS_VARIABLE_READ_NODE: {
13682 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13683 pm_node_destroy(parser, target);
13684 return (pm_node_t *) node;
13685 }
13686 case PM_CONSTANT_PATH_NODE: {
13687 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13688
13689 if (context_def_p(parser)) {
13690 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13691 }
13692
13693 return parse_shareable_constant_write(parser, node);
13694 }
13695 case PM_CONSTANT_READ_NODE: {
13696 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13697
13698 if (context_def_p(parser)) {
13699 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13700 }
13701
13702 pm_node_destroy(parser, target);
13703 return parse_shareable_constant_write(parser, node);
13704 }
13705 case PM_BACK_REFERENCE_READ_NODE:
13706 case PM_NUMBERED_REFERENCE_READ_NODE:
13707 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13709 case PM_GLOBAL_VARIABLE_READ_NODE: {
13710 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13711 pm_node_destroy(parser, target);
13712 return (pm_node_t *) node;
13713 }
13714 case PM_LOCAL_VARIABLE_READ_NODE: {
13716
13717 pm_constant_id_t name = local_read->name;
13718 pm_location_t name_loc = target->location;
13719
13720 uint32_t depth = local_read->depth;
13721 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13722
13723 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13724 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13725 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13726 parse_target_implicit_parameter(parser, target);
13727 }
13728
13729 pm_locals_unread(&scope->locals, name);
13730 pm_node_destroy(parser, target);
13731
13732 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13733 }
13734 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13735 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13736 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13737
13738 parse_target_implicit_parameter(parser, target);
13739 pm_node_destroy(parser, target);
13740
13741 return node;
13742 }
13743 case PM_INSTANCE_VARIABLE_READ_NODE: {
13744 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13745 pm_node_destroy(parser, target);
13746 return write_node;
13747 }
13748 case PM_MULTI_TARGET_NODE:
13749 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13750 case PM_SPLAT_NODE: {
13751 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13752
13753 if (splat->expression != NULL) {
13754 splat->expression = parse_write(parser, splat->expression, operator, value);
13755 }
13756
13757 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13758 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13759
13760 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13761 }
13762 case PM_CALL_NODE: {
13763 pm_call_node_t *call = (pm_call_node_t *) target;
13764
13765 // If we have no arguments to the call node and we need this to be a
13766 // target then this is either a method call or a local variable
13767 // write.
13768 if (
13769 (call->message_loc.start != NULL) &&
13770 (call->message_loc.end[-1] != '!') &&
13771 (call->message_loc.end[-1] != '?') &&
13772 (call->opening_loc.start == NULL) &&
13773 (call->arguments == NULL) &&
13774 (call->block == NULL)
13775 ) {
13776 if (call->receiver == NULL) {
13777 // When we get here, we have a local variable write, because it
13778 // was previously marked as a method call but now we have an =.
13779 // This looks like:
13780 //
13781 // foo = 1
13782 //
13783 // When it was parsed in the prefix position, foo was seen as a
13784 // method call with no receiver and no arguments. Now we have an
13785 // =, so we know it's a local variable write.
13786 const pm_location_t message = call->message_loc;
13787
13788 pm_parser_local_add_location(parser, message.start, message.end, 0);
13789 pm_node_destroy(parser, target);
13790
13791 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13792 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13793
13794 pm_refute_numbered_parameter(parser, message.start, message.end);
13795 return target;
13796 }
13797
13798 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13799 // When we get here, we have a method call, because it was
13800 // previously marked as a method call but now we have an =. This
13801 // looks like:
13802 //
13803 // foo.bar = 1
13804 //
13805 // When it was parsed in the prefix position, foo.bar was seen as a
13806 // method call with no arguments. Now we have an =, so we know it's
13807 // a method call with an argument. In this case we will create the
13808 // arguments node, parse the argument, and add it to the list.
13809 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13810 call->arguments = arguments;
13811
13812 pm_arguments_node_arguments_append(arguments, value);
13813 call->base.location.end = arguments->base.location.end;
13814 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13815
13816 parse_write_name(parser, &call->name);
13817 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13818
13819 return (pm_node_t *) call;
13820 }
13821 }
13822
13823 // If there is no call operator and the message is "[]" then this is
13824 // an aref expression, and we can transform it into an aset
13825 // expression.
13826 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13827 if (call->arguments == NULL) {
13828 call->arguments = pm_arguments_node_create(parser);
13829 }
13830
13831 pm_arguments_node_arguments_append(call->arguments, value);
13832 target->location.end = value->location.end;
13833
13834 // Replace the name with "[]=".
13835 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13836 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13837
13838 // Ensure that the arguments for []= don't contain keywords
13839 pm_index_arguments_check(parser, call->arguments, call->block);
13840 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13841
13842 return target;
13843 }
13844
13845 // If there are arguments on the call node, then it can't be a method
13846 // call ending with = or a local variable write, so it must be a
13847 // syntax error. In this case we'll fall through to our default
13848 // handling. We need to free the value that we parsed because there
13849 // is no way for us to attach it to the tree at this point.
13850 pm_node_destroy(parser, value);
13851 }
13853 default:
13854 // In this case we have a node that we don't know how to convert into a
13855 // target. We need to treat it as an error. For now, we'll mark it as an
13856 // error and just skip right past it.
13857 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13858 return target;
13859 }
13860}
13861
13868static pm_node_t *
13869parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13870 switch (PM_NODE_TYPE(target)) {
13871 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13872 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13873 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13874 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13875 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13876 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13877 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13878 default: break;
13879 }
13880
13881 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13882 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13883
13884 pm_node_destroy(parser, target);
13885 return (pm_node_t *) result;
13886}
13887
13898static pm_node_t *
13899parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13900 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13901
13902 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13903 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13904
13905 while (accept1(parser, PM_TOKEN_COMMA)) {
13906 if (accept1(parser, PM_TOKEN_USTAR)) {
13907 // Here we have a splat operator. It can have a name or be
13908 // anonymous. It can be the final target or be in the middle if
13909 // there haven't been any others yet.
13910 if (has_rest) {
13911 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13912 }
13913
13914 pm_token_t star_operator = parser->previous;
13915 pm_node_t *name = NULL;
13916
13917 if (token_begins_expression_p(parser->current.type)) {
13918 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13919 name = parse_target(parser, name, true, true);
13920 }
13921
13922 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13923 pm_multi_target_node_targets_append(parser, result, splat);
13924 has_rest = true;
13925 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13926 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13927 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13928 target = parse_target(parser, target, true, false);
13929
13930 pm_multi_target_node_targets_append(parser, result, target);
13931 context_pop(parser);
13932 } else if (token_begins_expression_p(parser->current.type)) {
13933 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13934 target = parse_target(parser, target, true, false);
13935
13936 pm_multi_target_node_targets_append(parser, result, target);
13937 } else if (!match1(parser, PM_TOKEN_EOF)) {
13938 // If we get here, then we have a trailing , in a multi target node.
13939 // We'll add an implicit rest node to represent this.
13940 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13941 pm_multi_target_node_targets_append(parser, result, rest);
13942 break;
13943 }
13944 }
13945
13946 return (pm_node_t *) result;
13947}
13948
13953static pm_node_t *
13954parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13955 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13956 accept1(parser, PM_TOKEN_NEWLINE);
13957
13958 // Ensure that we have either an = or a ) after the targets.
13959 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13960 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13961 }
13962
13963 return result;
13964}
13965
13969static pm_statements_node_t *
13970parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13971 // First, skip past any optional terminators that might be at the beginning
13972 // of the statements.
13973 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13974
13975 // If we have a terminator, then we can just return NULL.
13976 if (context_terminator(context, &parser->current)) return NULL;
13977
13978 pm_statements_node_t *statements = pm_statements_node_create(parser);
13979
13980 // At this point we know we have at least one statement, and that it
13981 // immediately follows the current token.
13982 context_push(parser, context);
13983
13984 while (true) {
13985 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13986 pm_statements_node_body_append(parser, statements, node, true);
13987
13988 // If we're recovering from a syntax error, then we need to stop parsing
13989 // the statements now.
13990 if (parser->recovering) {
13991 // If this is the level of context where the recovery has happened,
13992 // then we can mark the parser as done recovering.
13993 if (context_terminator(context, &parser->current)) parser->recovering = false;
13994 break;
13995 }
13996
13997 // If we have a terminator, then we will parse all consecutive
13998 // terminators and then continue parsing the statements list.
13999 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14000 // If we have a terminator, then we will continue parsing the
14001 // statements list.
14002 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14003 if (context_terminator(context, &parser->current)) break;
14004
14005 // Now we can continue parsing the list of statements.
14006 continue;
14007 }
14008
14009 // At this point we have a list of statements that are not terminated by
14010 // a newline or semicolon. At this point we need to check if we're at
14011 // the end of the statements list. If we are, then we should break out
14012 // of the loop.
14013 if (context_terminator(context, &parser->current)) break;
14014
14015 // At this point, we have a syntax error, because the statement was not
14016 // terminated by a newline or semicolon, and we're not at the end of the
14017 // statements list. Ideally we should scan forward to determine if we
14018 // should insert a missing terminator or break out of parsing the
14019 // statements list at this point.
14020 //
14021 // We don't have that yet, so instead we'll do a more naive approach. If
14022 // we were unable to parse an expression, then we will skip past this
14023 // token and continue parsing the statements list. Otherwise we'll add
14024 // an error and continue parsing the statements list.
14025 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
14026 parser_lex(parser);
14027
14028 // If we are at the end of the file, then we need to stop parsing
14029 // the statements entirely at this point. Mark the parser as
14030 // recovering, as we know that EOF closes the top-level context, and
14031 // then break out of the loop.
14032 if (match1(parser, PM_TOKEN_EOF)) {
14033 parser->recovering = true;
14034 break;
14035 }
14036
14037 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14038 if (context_terminator(context, &parser->current)) break;
14039 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
14040 // This is an inlined version of accept1 because the error that we
14041 // want to add has varargs. If this happens again, we should
14042 // probably extract a helper function.
14043 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14044 parser->previous.start = parser->previous.end;
14045 parser->previous.type = PM_TOKEN_MISSING;
14046 }
14047 }
14048
14049 context_pop(parser);
14050 bool last_value = true;
14051 switch (context) {
14054 last_value = false;
14055 break;
14056 default:
14057 break;
14058 }
14059 pm_void_statements_check(parser, statements, last_value);
14060
14061 return statements;
14062}
14063
14068static void
14069pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14070 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
14071
14072 if (duplicated != NULL) {
14073 pm_buffer_t buffer = { 0 };
14074 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
14075
14076 pm_diagnostic_list_append_format(
14077 &parser->warning_list,
14078 duplicated->location.start,
14079 duplicated->location.end,
14080 PM_WARN_DUPLICATED_HASH_KEY,
14081 (int) pm_buffer_length(&buffer),
14082 pm_buffer_value(&buffer),
14083 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14084 );
14085
14086 pm_buffer_free(&buffer);
14087 }
14088}
14089
14094static void
14095pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14096 pm_node_t *previous;
14097
14098 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14099 pm_diagnostic_list_append_format(
14100 &parser->warning_list,
14101 node->location.start,
14102 node->location.end,
14103 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14104 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14105 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14106 );
14107 }
14108}
14109
14113static bool
14114parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14115 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
14116 bool contains_keyword_splat = false;
14117
14118 while (true) {
14119 pm_node_t *element;
14120
14121 switch (parser->current.type) {
14122 case PM_TOKEN_USTAR_STAR: {
14123 parser_lex(parser);
14124 pm_token_t operator = parser->previous;
14125 pm_node_t *value = NULL;
14126
14127 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14128 // If we're about to parse a nested hash that is being
14129 // pushed into this hash directly with **, then we want the
14130 // inner hash to share the static literals with the outer
14131 // hash.
14132 parser->current_hash_keys = literals;
14133 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14134 } else if (token_begins_expression_p(parser->current.type)) {
14135 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14136 } else {
14137 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14138 }
14139
14140 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14141 contains_keyword_splat = true;
14142 break;
14143 }
14144 case PM_TOKEN_LABEL: {
14145 pm_token_t label = parser->current;
14146 parser_lex(parser);
14147
14148 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14149 pm_hash_key_static_literals_add(parser, literals, key);
14150
14151 pm_token_t operator = not_provided(parser);
14152 pm_node_t *value = NULL;
14153
14154 if (token_begins_expression_p(parser->current.type)) {
14155 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14156 } else {
14157 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14158 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14159 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14160 } else {
14161 int depth = -1;
14162 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14163
14164 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14165 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14166 } else {
14167 depth = pm_parser_local_depth(parser, &identifier);
14168 }
14169
14170 if (depth == -1) {
14171 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14172 } else {
14173 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14174 }
14175 }
14176
14177 value->location.end++;
14178 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14179 }
14180
14181 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14182 break;
14183 }
14184 default: {
14185 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14186
14187 // Hash keys that are strings are automatically frozen. We will
14188 // mark that here.
14189 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14190 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14191 }
14192
14193 pm_hash_key_static_literals_add(parser, literals, key);
14194
14195 pm_token_t operator;
14196 if (pm_symbol_node_label_p(key)) {
14197 operator = not_provided(parser);
14198 } else {
14199 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14200 operator = parser->previous;
14201 }
14202
14203 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14204 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14205 break;
14206 }
14207 }
14208
14209 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14210 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14211 } else {
14212 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14213 }
14214
14215 // If there's no comma after the element, then we're done.
14216 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14217
14218 // If the next element starts with a label or a **, then we know we have
14219 // another element in the hash, so we'll continue parsing.
14220 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14221
14222 // Otherwise we need to check if the subsequent token begins an expression.
14223 // If it does, then we'll continue parsing.
14224 if (token_begins_expression_p(parser->current.type)) continue;
14225
14226 // Otherwise by default we will exit out of this loop.
14227 break;
14228 }
14229
14230 return contains_keyword_splat;
14231}
14232
14236static inline void
14237parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14238 if (arguments->arguments == NULL) {
14239 arguments->arguments = pm_arguments_node_create(parser);
14240 }
14241
14242 pm_arguments_node_arguments_append(arguments->arguments, argument);
14243}
14244
14248static void
14249parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14250 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14251
14252 // First we need to check if the next token is one that could be the start
14253 // of an argument. If it's not, then we can just return.
14254 if (
14255 match2(parser, terminator, PM_TOKEN_EOF) ||
14256 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14257 context_terminator(parser->current_context->context, &parser->current)
14258 ) {
14259 return;
14260 }
14261
14262 bool parsed_first_argument = false;
14263 bool parsed_bare_hash = false;
14264 bool parsed_block_argument = false;
14265 bool parsed_forwarding_arguments = false;
14266
14267 while (!match1(parser, PM_TOKEN_EOF)) {
14268 if (parsed_forwarding_arguments) {
14269 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14270 }
14271
14272 pm_node_t *argument = NULL;
14273
14274 switch (parser->current.type) {
14275 case PM_TOKEN_USTAR_STAR:
14276 case PM_TOKEN_LABEL: {
14277 if (parsed_bare_hash) {
14278 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14279 }
14280
14281 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14282 argument = (pm_node_t *) hash;
14283
14284 pm_static_literals_t hash_keys = { 0 };
14285 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14286
14287 parse_arguments_append(parser, arguments, argument);
14288
14289 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14290 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14291 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14292
14293 pm_static_literals_free(&hash_keys);
14294 parsed_bare_hash = true;
14295
14296 break;
14297 }
14298 case PM_TOKEN_UAMPERSAND: {
14299 parser_lex(parser);
14300 pm_token_t operator = parser->previous;
14301 pm_node_t *expression = NULL;
14302
14303 if (token_begins_expression_p(parser->current.type)) {
14304 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14305 } else {
14306 pm_parser_scope_forwarding_block_check(parser, &operator);
14307 }
14308
14309 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14310 if (parsed_block_argument) {
14311 parse_arguments_append(parser, arguments, argument);
14312 } else {
14313 arguments->block = argument;
14314 }
14315
14316 if (match1(parser, PM_TOKEN_COMMA)) {
14317 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14318 }
14319
14320 parsed_block_argument = true;
14321 break;
14322 }
14323 case PM_TOKEN_USTAR: {
14324 parser_lex(parser);
14325 pm_token_t operator = parser->previous;
14326
14327 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14328 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14329 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14330 if (parsed_bare_hash) {
14331 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14332 }
14333 } else {
14334 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14335
14336 if (parsed_bare_hash) {
14337 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14338 }
14339
14340 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14341 }
14342
14343 parse_arguments_append(parser, arguments, argument);
14344 break;
14345 }
14346 case PM_TOKEN_UDOT_DOT_DOT: {
14347 if (accepts_forwarding) {
14348 parser_lex(parser);
14349
14350 if (token_begins_expression_p(parser->current.type)) {
14351 // If the token begins an expression then this ... was
14352 // not actually argument forwarding but was instead a
14353 // range.
14354 pm_token_t operator = parser->previous;
14355 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14356
14357 // If we parse a range, we need to validate that we
14358 // didn't accidentally violate the nonassoc rules of the
14359 // ... operator.
14360 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14361 pm_range_node_t *range = (pm_range_node_t *) right;
14362 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14363 }
14364
14365 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14366 } else {
14367 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14368 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14369 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14370 }
14371
14372 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14373 parse_arguments_append(parser, arguments, argument);
14374 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14375 arguments->has_forwarding = true;
14376 parsed_forwarding_arguments = true;
14377 break;
14378 }
14379 }
14380 }
14382 default: {
14383 if (argument == NULL) {
14384 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14385 }
14386
14387 bool contains_keywords = false;
14388 bool contains_keyword_splat = false;
14389
14390 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14391 if (parsed_bare_hash) {
14392 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14393 }
14394
14395 pm_token_t operator;
14396 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14397 operator = parser->previous;
14398 } else {
14399 operator = not_provided(parser);
14400 }
14401
14402 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14403 contains_keywords = true;
14404
14405 // Create the set of static literals for this hash.
14406 pm_static_literals_t hash_keys = { 0 };
14407 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14408
14409 // Finish parsing the one we are part way through.
14410 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14411 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14412
14413 pm_keyword_hash_node_elements_append(bare_hash, argument);
14414 argument = (pm_node_t *) bare_hash;
14415
14416 // Then parse more if we have a comma
14417 if (accept1(parser, PM_TOKEN_COMMA) && (
14418 token_begins_expression_p(parser->current.type) ||
14419 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14420 )) {
14421 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14422 }
14423
14424 pm_static_literals_free(&hash_keys);
14425 parsed_bare_hash = true;
14426 }
14427
14428 parse_arguments_append(parser, arguments, argument);
14429
14430 pm_node_flags_t flags = 0;
14431 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14432 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14433 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14434
14435 break;
14436 }
14437 }
14438
14439 parsed_first_argument = true;
14440
14441 // If parsing the argument failed, we need to stop parsing arguments.
14442 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14443
14444 // If the terminator of these arguments is not EOF, then we have a
14445 // specific token we're looking for. In that case we can accept a
14446 // newline here because it is not functioning as a statement terminator.
14447 bool accepted_newline = false;
14448 if (terminator != PM_TOKEN_EOF) {
14449 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14450 }
14451
14452 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14453 // If we previously were on a comma and we just parsed a bare hash,
14454 // then we want to continue parsing arguments. This is because the
14455 // comma was grabbed up by the hash parser.
14456 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14457 // If there was a comma, then we need to check if we also accepted a
14458 // newline. If we did, then this is a syntax error.
14459 if (accepted_newline) {
14460 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14461 }
14462
14463 // If this is a command call and an argument takes a block,
14464 // there can be no further arguments. For example,
14465 // `foo(bar 1 do end, 2)` should be rejected.
14466 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
14467 pm_call_node_t *call = (pm_call_node_t *) argument;
14468 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
14469 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14470 break;
14471 }
14472 }
14473 } else {
14474 // If there is no comma at the end of the argument list then we're
14475 // done parsing arguments and can break out of this loop.
14476 break;
14477 }
14478
14479 // If we hit the terminator, then that means we have a trailing comma so
14480 // we can accept that output as well.
14481 if (match1(parser, terminator)) break;
14482 }
14483}
14484
14496parse_required_destructured_parameter(pm_parser_t *parser) {
14497 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14498
14499 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14500 pm_multi_target_node_opening_set(node, &parser->previous);
14501
14502 do {
14503 pm_node_t *param;
14504
14505 // If we get here then we have a trailing comma, which isn't allowed in
14506 // the grammar. In other places, multi targets _do_ allow trailing
14507 // commas, so here we'll assume this is a mistake of the user not
14508 // knowing it's not allowed here.
14509 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14510 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14511 pm_multi_target_node_targets_append(parser, node, param);
14512 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14513 break;
14514 }
14515
14516 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14517 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14518 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14519 pm_token_t star = parser->previous;
14520 pm_node_t *value = NULL;
14521
14522 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14523 pm_token_t name = parser->previous;
14524 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14525 if (pm_parser_parameter_name_check(parser, &name)) {
14526 pm_node_flag_set_repeated_parameter(value);
14527 }
14528 pm_parser_local_add_token(parser, &name, 1);
14529 }
14530
14531 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14532 } else {
14533 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14534 pm_token_t name = parser->previous;
14535
14536 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14537 if (pm_parser_parameter_name_check(parser, &name)) {
14538 pm_node_flag_set_repeated_parameter(param);
14539 }
14540 pm_parser_local_add_token(parser, &name, 1);
14541 }
14542
14543 pm_multi_target_node_targets_append(parser, node, param);
14544 } while (accept1(parser, PM_TOKEN_COMMA));
14545
14546 accept1(parser, PM_TOKEN_NEWLINE);
14547 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14548 pm_multi_target_node_closing_set(node, &parser->previous);
14549
14550 return node;
14551}
14552
14557typedef enum {
14558 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14559 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14560 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14561 PM_PARAMETERS_ORDER_KEYWORDS,
14562 PM_PARAMETERS_ORDER_REST,
14563 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14564 PM_PARAMETERS_ORDER_OPTIONAL,
14565 PM_PARAMETERS_ORDER_NAMED,
14566 PM_PARAMETERS_ORDER_NONE,
14567} pm_parameters_order_t;
14568
14572static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14573 [0] = PM_PARAMETERS_NO_CHANGE,
14574 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14575 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14576 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14577 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14578 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14579 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14580 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14581 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14582 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14583 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14584 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14585};
14586
14594static bool
14595update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14596 pm_parameters_order_t state = parameters_ordering[token->type];
14597 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14598
14599 // If we see another ordered argument after a optional argument
14600 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14601 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14602 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14603 return true;
14604 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14605 return true;
14606 }
14607
14608 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14609 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14610 return false;
14611 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14612 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14613 return false;
14614 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14615 // We know what transition we failed on, so we can provide a better error here.
14616 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14617 return false;
14618 }
14619
14620 if (state < *current) *current = state;
14621 return true;
14622}
14623
14629static inline void
14630refute_optional_parameter(pm_parser_t *parser) {
14631 if (match1(parser, PM_TOKEN_EQUAL)) {
14632 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
14633 }
14634}
14635
14639static pm_parameters_node_t *
14640parse_parameters(
14641 pm_parser_t *parser,
14642 pm_binding_power_t binding_power,
14643 bool uses_parentheses,
14644 bool allows_trailing_comma,
14645 bool allows_forwarding_parameters,
14646 bool accepts_blocks_in_defaults,
14647 bool in_block,
14648 uint16_t depth
14649) {
14650 pm_do_loop_stack_push(parser, false);
14651
14652 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14653 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14654
14655 while (true) {
14656 bool parsing = true;
14657
14658 switch (parser->current.type) {
14659 case PM_TOKEN_PARENTHESIS_LEFT: {
14660 update_parameter_state(parser, &parser->current, &order);
14661 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14662
14663 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14664 pm_parameters_node_requireds_append(params, param);
14665 } else {
14666 pm_parameters_node_posts_append(params, param);
14667 }
14668 break;
14669 }
14670 case PM_TOKEN_UAMPERSAND:
14671 case PM_TOKEN_AMPERSAND: {
14672 update_parameter_state(parser, &parser->current, &order);
14673 parser_lex(parser);
14674
14675 pm_token_t operator = parser->previous;
14676 pm_token_t name;
14677
14678 bool repeated = false;
14679 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14680 name = parser->previous;
14681 repeated = pm_parser_parameter_name_check(parser, &name);
14682 pm_parser_local_add_token(parser, &name, 1);
14683 } else {
14684 name = not_provided(parser);
14685 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14686 }
14687
14688 if (!uses_parentheses) {
14689 refute_optional_parameter(parser);
14690 }
14691
14692 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14693 if (repeated) {
14694 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14695 }
14696 if (params->block == NULL) {
14697 pm_parameters_node_block_set(params, param);
14698 } else {
14699 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14700 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14701 }
14702
14703 break;
14704 }
14705 case PM_TOKEN_UDOT_DOT_DOT: {
14706 if (!allows_forwarding_parameters) {
14707 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14708 }
14709
14710 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14711 parser_lex(parser);
14712
14713 if (!uses_parentheses) {
14714 refute_optional_parameter(parser);
14715 }
14716
14717 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14718 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14719
14720 if (params->keyword_rest != NULL) {
14721 // If we already have a keyword rest parameter, then we replace it with the
14722 // forwarding parameter and move the keyword rest parameter to the posts list.
14723 pm_node_t *keyword_rest = params->keyword_rest;
14724 pm_parameters_node_posts_append(params, keyword_rest);
14725 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14726 params->keyword_rest = NULL;
14727 }
14728
14729 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14730 break;
14731 }
14732 case PM_TOKEN_CLASS_VARIABLE:
14733 case PM_TOKEN_IDENTIFIER:
14734 case PM_TOKEN_CONSTANT:
14735 case PM_TOKEN_INSTANCE_VARIABLE:
14736 case PM_TOKEN_GLOBAL_VARIABLE:
14737 case PM_TOKEN_METHOD_NAME: {
14738 parser_lex(parser);
14739 switch (parser->previous.type) {
14740 case PM_TOKEN_CONSTANT:
14741 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14742 break;
14743 case PM_TOKEN_INSTANCE_VARIABLE:
14744 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14745 break;
14746 case PM_TOKEN_GLOBAL_VARIABLE:
14747 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14748 break;
14749 case PM_TOKEN_CLASS_VARIABLE:
14750 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14751 break;
14752 case PM_TOKEN_METHOD_NAME:
14753 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14754 break;
14755 default: break;
14756 }
14757
14758 if (parser->current.type == PM_TOKEN_EQUAL) {
14759 update_parameter_state(parser, &parser->current, &order);
14760 } else {
14761 update_parameter_state(parser, &parser->previous, &order);
14762 }
14763
14764 pm_token_t name = parser->previous;
14765 bool repeated = pm_parser_parameter_name_check(parser, &name);
14766 pm_parser_local_add_token(parser, &name, 1);
14767
14768 if (match1(parser, PM_TOKEN_EQUAL)) {
14769 pm_token_t operator = parser->current;
14770 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14771 parser_lex(parser);
14772
14773 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14774 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14775
14776 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14777 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14778 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14779
14780 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14781
14782 if (repeated) {
14783 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14784 }
14785 pm_parameters_node_optionals_append(params, param);
14786
14787 // If the value of the parameter increased the number of
14788 // reads of that parameter, then we need to warn that we
14789 // have a circular definition.
14790 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14791 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14792 }
14793
14794 context_pop(parser);
14795
14796 // If parsing the value of the parameter resulted in error recovery,
14797 // then we can put a missing node in its place and stop parsing the
14798 // parameters entirely now.
14799 if (parser->recovering) {
14800 parsing = false;
14801 break;
14802 }
14803 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14804 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14805 if (repeated) {
14806 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14807 }
14808 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14809 } else {
14810 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14811 if (repeated) {
14812 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14813 }
14814 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14815 }
14816
14817 break;
14818 }
14819 case PM_TOKEN_LABEL: {
14820 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14821 update_parameter_state(parser, &parser->current, &order);
14822
14823 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14824 parser_lex(parser);
14825
14826 pm_token_t name = parser->previous;
14827 pm_token_t local = name;
14828 local.end -= 1;
14829
14830 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14831 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14832 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14833 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14834 }
14835
14836 bool repeated = pm_parser_parameter_name_check(parser, &local);
14837 pm_parser_local_add_token(parser, &local, 1);
14838
14839 switch (parser->current.type) {
14840 case PM_TOKEN_COMMA:
14841 case PM_TOKEN_PARENTHESIS_RIGHT:
14842 case PM_TOKEN_PIPE: {
14843 context_pop(parser);
14844
14845 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14846 if (repeated) {
14847 pm_node_flag_set_repeated_parameter(param);
14848 }
14849
14850 pm_parameters_node_keywords_append(params, param);
14851 break;
14852 }
14853 case PM_TOKEN_SEMICOLON:
14854 case PM_TOKEN_NEWLINE: {
14855 context_pop(parser);
14856
14857 if (uses_parentheses) {
14858 parsing = false;
14859 break;
14860 }
14861
14862 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14863 if (repeated) {
14864 pm_node_flag_set_repeated_parameter(param);
14865 }
14866
14867 pm_parameters_node_keywords_append(params, param);
14868 break;
14869 }
14870 default: {
14871 pm_node_t *param;
14872
14873 if (token_begins_expression_p(parser->current.type)) {
14874 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14875 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14876
14877 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14878 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14879 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14880
14881 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14882 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14883 }
14884
14885 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14886 }
14887 else {
14888 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14889 }
14890
14891 if (repeated) {
14892 pm_node_flag_set_repeated_parameter(param);
14893 }
14894
14895 context_pop(parser);
14896 pm_parameters_node_keywords_append(params, param);
14897
14898 if (!uses_parentheses) {
14899 refute_optional_parameter(parser);
14900 }
14901
14902 // If parsing the value of the parameter resulted in error recovery,
14903 // then we can put a missing node in its place and stop parsing the
14904 // parameters entirely now.
14905 if (parser->recovering) {
14906 parsing = false;
14907 break;
14908 }
14909 }
14910 }
14911
14912 parser->in_keyword_arg = false;
14913 break;
14914 }
14915 case PM_TOKEN_USTAR:
14916 case PM_TOKEN_STAR: {
14917 update_parameter_state(parser, &parser->current, &order);
14918 parser_lex(parser);
14919
14920 pm_token_t operator = parser->previous;
14921 pm_token_t name;
14922 bool repeated = false;
14923
14924 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14925 name = parser->previous;
14926 repeated = pm_parser_parameter_name_check(parser, &name);
14927 pm_parser_local_add_token(parser, &name, 1);
14928 } else {
14929 name = not_provided(parser);
14930 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14931 }
14932
14933 if (!uses_parentheses) {
14934 refute_optional_parameter(parser);
14935 }
14936
14937 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14938 if (repeated) {
14939 pm_node_flag_set_repeated_parameter(param);
14940 }
14941
14942 if (params->rest == NULL) {
14943 pm_parameters_node_rest_set(params, param);
14944 } else {
14945 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14946 pm_parameters_node_posts_append(params, param);
14947 }
14948
14949 break;
14950 }
14951 case PM_TOKEN_STAR_STAR:
14952 case PM_TOKEN_USTAR_STAR: {
14953 pm_parameters_order_t previous_order = order;
14954 update_parameter_state(parser, &parser->current, &order);
14955 parser_lex(parser);
14956
14957 pm_token_t operator = parser->previous;
14958 pm_node_t *param;
14959
14960 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14961 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14962 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14963 }
14964
14965 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14966 } else {
14967 pm_token_t name;
14968
14969 bool repeated = false;
14970 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14971 name = parser->previous;
14972 repeated = pm_parser_parameter_name_check(parser, &name);
14973 pm_parser_local_add_token(parser, &name, 1);
14974 } else {
14975 name = not_provided(parser);
14976 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14977 }
14978
14979 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14980 if (repeated) {
14981 pm_node_flag_set_repeated_parameter(param);
14982 }
14983 }
14984
14985 if (!uses_parentheses) {
14986 refute_optional_parameter(parser);
14987 }
14988
14989 if (params->keyword_rest == NULL) {
14990 pm_parameters_node_keyword_rest_set(params, param);
14991 } else {
14992 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14993 pm_parameters_node_posts_append(params, param);
14994 }
14995
14996 break;
14997 }
14998 default:
14999 if (parser->previous.type == PM_TOKEN_COMMA) {
15000 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
15001 // If we get here, then we have a trailing comma in a
15002 // block parameter list.
15003 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
15004
15005 if (params->rest == NULL) {
15006 pm_parameters_node_rest_set(params, param);
15007 } else {
15008 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
15009 pm_parameters_node_posts_append(params, (pm_node_t *) param);
15010 }
15011 } else {
15012 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
15013 }
15014 }
15015
15016 parsing = false;
15017 break;
15018 }
15019
15020 // If we hit some kind of issue while parsing the parameter, this would
15021 // have been set to false. In that case, we need to break out of the
15022 // loop.
15023 if (!parsing) break;
15024
15025 bool accepted_newline = false;
15026 if (uses_parentheses) {
15027 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
15028 }
15029
15030 if (accept1(parser, PM_TOKEN_COMMA)) {
15031 // If there was a comma, but we also accepted a newline, then this
15032 // is a syntax error.
15033 if (accepted_newline) {
15034 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
15035 }
15036 } else {
15037 // If there was no comma, then we're done parsing parameters.
15038 break;
15039 }
15040 }
15041
15042 pm_do_loop_stack_pop(parser);
15043
15044 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
15045 if (params->base.location.start == params->base.location.end) {
15046 pm_node_destroy(parser, (pm_node_t *) params);
15047 return NULL;
15048 }
15049
15050 return params;
15051}
15052
15057static size_t
15058token_newline_index(const pm_parser_t *parser) {
15059 if (parser->heredoc_end == NULL) {
15060 // This is the common case. In this case we can look at the previously
15061 // recorded newline in the newline list and subtract from the current
15062 // offset.
15063 return parser->newline_list.size - 1;
15064 } else {
15065 // This is unlikely. This is the case that we have already parsed the
15066 // start of a heredoc, so we cannot rely on looking at the previous
15067 // offset of the newline list, and instead must go through the whole
15068 // process of a binary search for the line number.
15069 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
15070 }
15071}
15072
15077static int64_t
15078token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
15079 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
15080 const uint8_t *end = token->start;
15081
15082 // Skip over the BOM if it is present.
15083 if (
15084 newline_index == 0 &&
15085 parser->start[0] == 0xef &&
15086 parser->start[1] == 0xbb &&
15087 parser->start[2] == 0xbf
15088 ) cursor += 3;
15089
15090 int64_t column = 0;
15091 for (; cursor < end; cursor++) {
15092 switch (*cursor) {
15093 case '\t':
15094 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
15095 break;
15096 case ' ':
15097 column++;
15098 break;
15099 default:
15100 column++;
15101 if (break_on_non_space) return -1;
15102 break;
15103 }
15104 }
15105
15106 return column;
15107}
15108
15113static void
15114parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
15115 // If these warnings are disabled (unlikely), then we can just return.
15116 if (!parser->warn_mismatched_indentation) return;
15117
15118 // If the tokens are on the same line, we do not warn.
15119 size_t closing_newline_index = token_newline_index(parser);
15120 if (opening_newline_index == closing_newline_index) return;
15121
15122 // If the opening token has anything other than spaces or tabs before it,
15123 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15124 // and the `if` immediately follows an `else` keyword.
15125 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15126 if (!if_after_else && (opening_column == -1)) return;
15127
15128 // Get a reference to the closing token off the current parser. This assumes
15129 // that the caller has placed this in the correct position.
15130 pm_token_t *closing_token = &parser->current;
15131
15132 // If the tokens are at the same indentation, we do not warn.
15133 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15134 if ((closing_column == -1) || (opening_column == closing_column)) return;
15135
15136 // If the closing column is greater than the opening column and we are
15137 // allowing indentation, then we do not warn.
15138 if (allow_indent && (closing_column > opening_column)) return;
15139
15140 // Otherwise, add a warning.
15141 PM_PARSER_WARN_FORMAT(
15142 parser,
15143 closing_token->start,
15144 closing_token->end,
15145 PM_WARN_INDENTATION_MISMATCH,
15146 (int) (closing_token->end - closing_token->start),
15147 (const char *) closing_token->start,
15148 (int) (opening_token->end - opening_token->start),
15149 (const char *) opening_token->start,
15150 ((int32_t) opening_newline_index) + parser->start_line
15151 );
15152}
15153
15154typedef enum {
15155 PM_RESCUES_BEGIN = 1,
15156 PM_RESCUES_BLOCK,
15157 PM_RESCUES_CLASS,
15158 PM_RESCUES_DEF,
15159 PM_RESCUES_LAMBDA,
15160 PM_RESCUES_MODULE,
15161 PM_RESCUES_SCLASS
15162} pm_rescues_type_t;
15163
15168static inline void
15169parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15170 pm_rescue_node_t *current = NULL;
15171
15172 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15173 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15174 parser_lex(parser);
15175
15176 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15177
15178 switch (parser->current.type) {
15179 case PM_TOKEN_EQUAL_GREATER: {
15180 // Here we have an immediate => after the rescue keyword, in which case
15181 // we're going to have an empty list of exceptions to rescue (which
15182 // implies StandardError).
15183 parser_lex(parser);
15184 pm_rescue_node_operator_set(rescue, &parser->previous);
15185
15186 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15187 reference = parse_target(parser, reference, false, false);
15188
15189 pm_rescue_node_reference_set(rescue, reference);
15190 break;
15191 }
15192 case PM_TOKEN_NEWLINE:
15193 case PM_TOKEN_SEMICOLON:
15194 case PM_TOKEN_KEYWORD_THEN:
15195 // Here we have a terminator for the rescue keyword, in which
15196 // case we're going to just continue on.
15197 break;
15198 default: {
15199 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15200 // Here we have something that could be an exception expression, so
15201 // we'll attempt to parse it here and any others delimited by commas.
15202
15203 do {
15204 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15205 pm_rescue_node_exceptions_append(rescue, expression);
15206
15207 // If we hit a newline, then this is the end of the rescue expression. We
15208 // can continue on to parse the statements.
15209 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15210
15211 // If we hit a `=>` then we're going to parse the exception variable. Once
15212 // we've done that, we'll break out of the loop and parse the statements.
15213 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15214 pm_rescue_node_operator_set(rescue, &parser->previous);
15215
15216 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15217 reference = parse_target(parser, reference, false, false);
15218
15219 pm_rescue_node_reference_set(rescue, reference);
15220 break;
15221 }
15222 } while (accept1(parser, PM_TOKEN_COMMA));
15223 }
15224 }
15225 }
15226
15227 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15228 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15229 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15230 }
15231 } else {
15232 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15233 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15234 }
15235
15236 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
15237 pm_accepts_block_stack_push(parser, true);
15238 pm_context_t context;
15239
15240 switch (type) {
15241 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15242 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15243 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15244 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15245 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15246 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15247 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15248 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15249 }
15250
15251 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15252 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15253
15254 pm_accepts_block_stack_pop(parser);
15255 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15256 }
15257
15258 if (current == NULL) {
15259 pm_begin_node_rescue_clause_set(parent_node, rescue);
15260 } else {
15261 pm_rescue_node_subsequent_set(current, rescue);
15262 }
15263
15264 current = rescue;
15265 }
15266
15267 // The end node locations on rescue nodes will not be set correctly
15268 // since we won't know the end until we've found all subsequent
15269 // clauses. This sets the end location on all rescues once we know it.
15270 if (current != NULL) {
15271 const uint8_t *end_to_set = current->base.location.end;
15272 pm_rescue_node_t *clause = parent_node->rescue_clause;
15273
15274 while (clause != NULL) {
15275 clause->base.location.end = end_to_set;
15276 clause = clause->subsequent;
15277 }
15278 }
15279
15280 pm_token_t else_keyword;
15281 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15282 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15283 opening_newline_index = token_newline_index(parser);
15284
15285 else_keyword = parser->current;
15286 opening = &else_keyword;
15287
15288 parser_lex(parser);
15289 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15290
15291 pm_statements_node_t *else_statements = NULL;
15292 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15293 pm_accepts_block_stack_push(parser, true);
15294 pm_context_t context;
15295
15296 switch (type) {
15297 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15298 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15299 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15300 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15301 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15302 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15303 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15304 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15305 }
15306
15307 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15308 pm_accepts_block_stack_pop(parser);
15309
15310 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15311 }
15312
15313 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15314 pm_begin_node_else_clause_set(parent_node, else_clause);
15315
15316 // If we don't have a `current` rescue node, then this is a dangling
15317 // else, and it's an error.
15318 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15319 }
15320
15321 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15322 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15323 pm_token_t ensure_keyword = parser->current;
15324
15325 parser_lex(parser);
15326 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15327
15328 pm_statements_node_t *ensure_statements = NULL;
15329 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15330 pm_accepts_block_stack_push(parser, true);
15331 pm_context_t context;
15332
15333 switch (type) {
15334 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15335 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15336 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15337 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15338 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15339 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15340 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15341 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15342 }
15343
15344 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15345 pm_accepts_block_stack_pop(parser);
15346
15347 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15348 }
15349
15350 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15351 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15352 }
15353
15354 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15355 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15356 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15357 } else {
15358 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15359 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15360 }
15361}
15362
15367static pm_begin_node_t *
15368parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15369 pm_token_t begin_keyword = not_provided(parser);
15370 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15371
15372 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15373 node->base.location.start = start;
15374
15375 return node;
15376}
15377
15382parse_block_parameters(
15383 pm_parser_t *parser,
15384 bool allows_trailing_comma,
15385 const pm_token_t *opening,
15386 bool is_lambda_literal,
15387 bool accepts_blocks_in_defaults,
15388 uint16_t depth
15389) {
15390 pm_parameters_node_t *parameters = NULL;
15391 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15392 parameters = parse_parameters(
15393 parser,
15394 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15395 false,
15396 allows_trailing_comma,
15397 false,
15398 accepts_blocks_in_defaults,
15399 true,
15400 (uint16_t) (depth + 1)
15401 );
15402 }
15403
15404 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15405 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15406 accept1(parser, PM_TOKEN_NEWLINE);
15407
15408 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15409 do {
15410 switch (parser->current.type) {
15411 case PM_TOKEN_CONSTANT:
15412 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15413 parser_lex(parser);
15414 break;
15415 case PM_TOKEN_INSTANCE_VARIABLE:
15416 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15417 parser_lex(parser);
15418 break;
15419 case PM_TOKEN_GLOBAL_VARIABLE:
15420 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15421 parser_lex(parser);
15422 break;
15423 case PM_TOKEN_CLASS_VARIABLE:
15424 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15425 parser_lex(parser);
15426 break;
15427 default:
15428 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15429 break;
15430 }
15431
15432 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15433 pm_parser_local_add_token(parser, &parser->previous, 1);
15434
15435 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15436 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15437
15438 pm_block_parameters_node_append_local(block_parameters, local);
15439 } while (accept1(parser, PM_TOKEN_COMMA));
15440 }
15441 }
15442
15443 return block_parameters;
15444}
15445
15450static bool
15451outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15452 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15453 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15454 }
15455
15456 return false;
15457}
15458
15464static const char * const pm_numbered_parameter_names[] = {
15465 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15466};
15467
15473static pm_node_t *
15474parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15475 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15476
15477 // If we have ordinary parameters, then we will return them as the set of
15478 // parameters.
15479 if (parameters != NULL) {
15480 // If we also have implicit parameters, then this is an error.
15481 if (implicit_parameters->size > 0) {
15482 pm_node_t *node = implicit_parameters->nodes[0];
15483
15484 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15485 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15486 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15487 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15488 } else {
15489 assert(false && "unreachable");
15490 }
15491 }
15492
15493 return parameters;
15494 }
15495
15496 // If we don't have any implicit parameters, then the set of parameters is
15497 // NULL.
15498 if (implicit_parameters->size == 0) {
15499 return NULL;
15500 }
15501
15502 // If we don't have ordinary parameters, then we now must validate our set
15503 // of implicit parameters. We can only have numbered parameters or it, but
15504 // they cannot be mixed.
15505 uint8_t numbered_parameter = 0;
15506 bool it_parameter = false;
15507
15508 for (size_t index = 0; index < implicit_parameters->size; index++) {
15509 pm_node_t *node = implicit_parameters->nodes[index];
15510
15511 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15512 if (it_parameter) {
15513 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15514 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15515 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15516 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15517 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15518 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15519 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15520 } else {
15521 assert(false && "unreachable");
15522 }
15523 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15524 if (numbered_parameter > 0) {
15525 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15526 } else {
15527 it_parameter = true;
15528 }
15529 }
15530 }
15531
15532 if (numbered_parameter > 0) {
15533 // Go through the parent scopes and mark them as being disallowed from
15534 // using numbered parameters because this inner scope is using them.
15535 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15536 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15537 }
15538
15539 const pm_location_t location = { .start = opening->start, .end = closing->end };
15540 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15541 }
15542
15543 if (it_parameter) {
15544 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15545 }
15546
15547 return NULL;
15548}
15549
15553static pm_block_node_t *
15554parse_block(pm_parser_t *parser, uint16_t depth) {
15555 pm_token_t opening = parser->previous;
15556 accept1(parser, PM_TOKEN_NEWLINE);
15557
15558 pm_accepts_block_stack_push(parser, true);
15559 pm_parser_scope_push(parser, false);
15560
15561 pm_block_parameters_node_t *block_parameters = NULL;
15562
15563 if (accept1(parser, PM_TOKEN_PIPE)) {
15564 pm_token_t block_parameters_opening = parser->previous;
15565 if (match1(parser, PM_TOKEN_PIPE)) {
15566 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15567 parser->command_start = true;
15568 parser_lex(parser);
15569 } else {
15570 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15571 accept1(parser, PM_TOKEN_NEWLINE);
15572 parser->command_start = true;
15573 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15574 }
15575
15576 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15577 }
15578
15579 accept1(parser, PM_TOKEN_NEWLINE);
15580 pm_node_t *statements = NULL;
15581
15582 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15583 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15584 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15585 }
15586
15587 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15588 } else {
15589 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15590 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15591 pm_accepts_block_stack_push(parser, true);
15592 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15593 pm_accepts_block_stack_pop(parser);
15594 }
15595
15596 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15597 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15598 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15599 }
15600 }
15601
15602 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15603 }
15604
15605 pm_constant_id_list_t locals;
15606 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15607 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15608
15609 pm_parser_scope_pop(parser);
15610 pm_accepts_block_stack_pop(parser);
15611
15612 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15613}
15614
15620static bool
15621parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15622 bool found = false;
15623
15624 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15625 found |= true;
15626 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15627
15628 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15629 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15630 } else {
15631 pm_accepts_block_stack_push(parser, true);
15632 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15633
15634 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15635 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15636 parser->previous.start = parser->previous.end;
15637 parser->previous.type = PM_TOKEN_MISSING;
15638 }
15639
15640 pm_accepts_block_stack_pop(parser);
15641 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15642 }
15643 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15644 found |= true;
15645 pm_accepts_block_stack_push(parser, false);
15646
15647 // If we get here, then the subsequent token cannot be used as an infix
15648 // operator. In this case we assume the subsequent token is part of an
15649 // argument to this method call.
15650 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15651
15652 // If we have done with the arguments and still not consumed the comma,
15653 // then we have a trailing comma where we need to check whether it is
15654 // allowed or not.
15655 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15656 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15657 }
15658
15659 pm_accepts_block_stack_pop(parser);
15660 }
15661
15662 // If we're at the end of the arguments, we can now check if there is a block
15663 // node that starts with a {. If there is, then we can parse it and add it to
15664 // the arguments.
15665 if (accepts_block) {
15666 pm_block_node_t *block = NULL;
15667
15668 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15669 found |= true;
15670 block = parse_block(parser, (uint16_t) (depth + 1));
15671 pm_arguments_validate_block(parser, arguments, block);
15672 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15673 found |= true;
15674 block = parse_block(parser, (uint16_t) (depth + 1));
15675 }
15676
15677 if (block != NULL) {
15678 if (arguments->block == NULL && !arguments->has_forwarding) {
15679 arguments->block = (pm_node_t *) block;
15680 } else {
15681 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15682
15683 if (arguments->block != NULL) {
15684 if (arguments->arguments == NULL) {
15685 arguments->arguments = pm_arguments_node_create(parser);
15686 }
15687 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15688 }
15689 arguments->block = (pm_node_t *) block;
15690 }
15691 }
15692 }
15693
15694 return found;
15695}
15696
15701static void
15702parse_return(pm_parser_t *parser, pm_node_t *node) {
15703 bool in_sclass = false;
15704 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15705 switch (context_node->context) {
15709 case PM_CONTEXT_BEGIN:
15710 case PM_CONTEXT_CASE_IN:
15713 case PM_CONTEXT_DEFINED:
15714 case PM_CONTEXT_ELSE:
15715 case PM_CONTEXT_ELSIF:
15716 case PM_CONTEXT_EMBEXPR:
15718 case PM_CONTEXT_FOR:
15719 case PM_CONTEXT_IF:
15721 case PM_CONTEXT_MAIN:
15723 case PM_CONTEXT_PARENS:
15724 case PM_CONTEXT_POSTEXE:
15726 case PM_CONTEXT_PREEXE:
15728 case PM_CONTEXT_TERNARY:
15729 case PM_CONTEXT_UNLESS:
15730 case PM_CONTEXT_UNTIL:
15731 case PM_CONTEXT_WHILE:
15732 // Keep iterating up the lists of contexts, because returns can
15733 // see through these.
15734 continue;
15738 case PM_CONTEXT_SCLASS:
15739 in_sclass = true;
15740 continue;
15744 case PM_CONTEXT_CLASS:
15748 case PM_CONTEXT_MODULE:
15749 // These contexts are invalid for a return.
15750 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15751 return;
15761 case PM_CONTEXT_DEF:
15767 // These contexts are valid for a return, and we should not
15768 // continue to loop.
15769 return;
15770 case PM_CONTEXT_NONE:
15771 // This case should never happen.
15772 assert(false && "unreachable");
15773 break;
15774 }
15775 }
15776 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
15777 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15778 }
15779}
15780
15785static void
15786parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15787 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15788 switch (context_node->context) {
15794 case PM_CONTEXT_DEFINED:
15795 case PM_CONTEXT_FOR:
15802 case PM_CONTEXT_POSTEXE:
15803 case PM_CONTEXT_UNTIL:
15804 case PM_CONTEXT_WHILE:
15805 // These are the good cases. We're allowed to have a block exit
15806 // in these contexts.
15807 return;
15808 case PM_CONTEXT_DEF:
15813 case PM_CONTEXT_MAIN:
15814 case PM_CONTEXT_PREEXE:
15815 case PM_CONTEXT_SCLASS:
15819 // These are the bad cases. We're not allowed to have a block
15820 // exit in these contexts.
15821 //
15822 // If we get here, then we're about to mark this block exit
15823 // as invalid. However, it could later _become_ valid if we
15824 // find a trailing while/until on the expression. In this
15825 // case instead of adding the error here, we'll add the
15826 // block exit to the list of exits for the expression, and
15827 // the node parsing will handle validating it instead.
15828 assert(parser->current_block_exits != NULL);
15829 pm_node_list_append(parser->current_block_exits, node);
15830 return;
15834 case PM_CONTEXT_BEGIN:
15835 case PM_CONTEXT_CASE_IN:
15840 case PM_CONTEXT_CLASS:
15842 case PM_CONTEXT_ELSE:
15843 case PM_CONTEXT_ELSIF:
15844 case PM_CONTEXT_EMBEXPR:
15846 case PM_CONTEXT_IF:
15850 case PM_CONTEXT_MODULE:
15852 case PM_CONTEXT_PARENS:
15855 case PM_CONTEXT_TERNARY:
15856 case PM_CONTEXT_UNLESS:
15857 // In these contexts we should continue walking up the list of
15858 // contexts.
15859 break;
15860 case PM_CONTEXT_NONE:
15861 // This case should never happen.
15862 assert(false && "unreachable");
15863 break;
15864 }
15865 }
15866}
15867
15872static pm_node_list_t *
15873push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15874 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15875 parser->current_block_exits = current_block_exits;
15876 return previous_block_exits;
15877}
15878
15884static void
15885flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15886 pm_node_t *block_exit;
15887 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15888 const char *type;
15889
15890 switch (PM_NODE_TYPE(block_exit)) {
15891 case PM_BREAK_NODE: type = "break"; break;
15892 case PM_NEXT_NODE: type = "next"; break;
15893 case PM_REDO_NODE: type = "redo"; break;
15894 default: assert(false && "unreachable"); type = ""; break;
15895 }
15896
15897 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15898 }
15899
15900 parser->current_block_exits = previous_block_exits;
15901}
15902
15907static void
15908pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15909 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15910 // If we matched a trailing while/until, then all of the block exits in
15911 // the contained list are valid. In this case we do not need to do
15912 // anything.
15913 parser->current_block_exits = previous_block_exits;
15914 } else if (previous_block_exits != NULL) {
15915 // If we did not matching a trailing while/until, then all of the block
15916 // exits contained in the list are invalid for this specific context.
15917 // However, they could still become valid in a higher level context if
15918 // there is another list above this one. In this case we'll push all of
15919 // the block exits up to the previous list.
15920 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15921 parser->current_block_exits = previous_block_exits;
15922 } else {
15923 // If we did not match a trailing while/until and this was the last
15924 // chance to do so, then all of the block exits in the list are invalid
15925 // and we need to add an error for each of them.
15926 flush_block_exits(parser, previous_block_exits);
15927 }
15928}
15929
15930static inline pm_node_t *
15931parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15932 context_push(parser, PM_CONTEXT_PREDICATE);
15933 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15934 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15935
15936 // Predicates are closed by a term, a "then", or a term and then a "then".
15937 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15938
15939 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15940 predicate_closed = true;
15941 *then_keyword = parser->previous;
15942 }
15943
15944 if (!predicate_closed) {
15945 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15946 }
15947
15948 context_pop(parser);
15949 return predicate;
15950}
15951
15952static inline pm_node_t *
15953parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15954 pm_node_list_t current_block_exits = { 0 };
15955 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15956
15957 pm_token_t keyword = parser->previous;
15958 pm_token_t then_keyword = not_provided(parser);
15959
15960 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15961 pm_statements_node_t *statements = NULL;
15962
15963 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15964 pm_accepts_block_stack_push(parser, true);
15965 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15966 pm_accepts_block_stack_pop(parser);
15967 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15968 }
15969
15970 pm_token_t end_keyword = not_provided(parser);
15971 pm_node_t *parent = NULL;
15972
15973 switch (context) {
15974 case PM_CONTEXT_IF:
15975 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15976 break;
15977 case PM_CONTEXT_UNLESS:
15978 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15979 break;
15980 default:
15981 assert(false && "unreachable");
15982 break;
15983 }
15984
15985 pm_node_t *current = parent;
15986
15987 // Parse any number of elsif clauses. This will form a linked list of if
15988 // nodes pointing to each other from the top.
15989 if (context == PM_CONTEXT_IF) {
15990 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15991 if (parser_end_of_line_p(parser)) {
15992 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15993 }
15994
15995 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15996 pm_token_t elsif_keyword = parser->current;
15997 parser_lex(parser);
15998
15999 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
16000 pm_accepts_block_stack_push(parser, true);
16001
16002 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
16003 pm_accepts_block_stack_pop(parser);
16004 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
16005
16006 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
16007 ((pm_if_node_t *) current)->subsequent = elsif;
16008 current = elsif;
16009 }
16010 }
16011
16012 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
16013 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
16014 opening_newline_index = token_newline_index(parser);
16015
16016 parser_lex(parser);
16017 pm_token_t else_keyword = parser->previous;
16018
16019 pm_accepts_block_stack_push(parser, true);
16020 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
16021 pm_accepts_block_stack_pop(parser);
16022
16023 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
16024 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
16025 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
16026
16027 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
16028
16029 switch (context) {
16030 case PM_CONTEXT_IF:
16031 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
16032 break;
16033 case PM_CONTEXT_UNLESS:
16034 ((pm_unless_node_t *) parent)->else_clause = else_node;
16035 break;
16036 default:
16037 assert(false && "unreachable");
16038 break;
16039 }
16040 } else {
16041 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
16042 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
16043 }
16044
16045 // Set the appropriate end location for all of the nodes in the subtree.
16046 switch (context) {
16047 case PM_CONTEXT_IF: {
16048 pm_node_t *current = parent;
16049 bool recursing = true;
16050
16051 while (recursing) {
16052 switch (PM_NODE_TYPE(current)) {
16053 case PM_IF_NODE:
16054 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
16055 current = ((pm_if_node_t *) current)->subsequent;
16056 recursing = current != NULL;
16057 break;
16058 case PM_ELSE_NODE:
16059 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
16060 recursing = false;
16061 break;
16062 default: {
16063 recursing = false;
16064 break;
16065 }
16066 }
16067 }
16068 break;
16069 }
16070 case PM_CONTEXT_UNLESS:
16071 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
16072 break;
16073 default:
16074 assert(false && "unreachable");
16075 break;
16076 }
16077
16078 pop_block_exits(parser, previous_block_exits);
16079 pm_node_list_free(&current_block_exits);
16080
16081 return parent;
16082}
16083
16088#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16089 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
16090 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
16091 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
16092 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
16093 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
16094 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
16095 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
16096 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
16097 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
16098 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
16099
16104#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
16105 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
16106 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
16107 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
16108 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
16109 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
16110 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
16111 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
16112
16118#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
16119 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
16120 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
16121 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
16122 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16123 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16124 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16125 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16126 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16127
16132#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16133 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16134 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16135 case PM_TOKEN_CLASS_VARIABLE
16136
16141#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16142 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16143 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16144 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16145
16146// Assert here that the flags are the same so that we can safely switch the type
16147// of the node without having to move the flags.
16148PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16149
16154static inline pm_node_flags_t
16155parse_unescaped_encoding(const pm_parser_t *parser) {
16156 if (parser->explicit_encoding != NULL) {
16158 // If the there's an explicit encoding and it's using a UTF-8 escape
16159 // sequence, then mark the string as UTF-8.
16160 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
16161 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16162 // If there's a non-UTF-8 escape sequence being used, then the
16163 // string uses the source encoding, unless the source is marked as
16164 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16165 // order to keep the string valid.
16166 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
16167 }
16168 }
16169 return 0;
16170}
16171
16176static pm_node_t *
16177parse_string_part(pm_parser_t *parser, uint16_t depth) {
16178 switch (parser->current.type) {
16179 // Here the lexer has returned to us plain string content. In this case
16180 // we'll create a string node that has no opening or closing and return that
16181 // as the part. These kinds of parts look like:
16182 //
16183 // "aaa #{bbb} #@ccc ddd"
16184 // ^^^^ ^ ^^^^
16185 case PM_TOKEN_STRING_CONTENT: {
16186 pm_token_t opening = not_provided(parser);
16187 pm_token_t closing = not_provided(parser);
16188
16189 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16190 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16191
16192 parser_lex(parser);
16193 return node;
16194 }
16195 // Here the lexer has returned the beginning of an embedded expression. In
16196 // that case we'll parse the inner statements and return that as the part.
16197 // These kinds of parts look like:
16198 //
16199 // "aaa #{bbb} #@ccc ddd"
16200 // ^^^^^^
16201 case PM_TOKEN_EMBEXPR_BEGIN: {
16202 // Ruby disallows seeing encoding around interpolation in strings,
16203 // even though it is known at parse time.
16204 parser->explicit_encoding = NULL;
16205
16206 pm_lex_state_t state = parser->lex_state;
16207 int brace_nesting = parser->brace_nesting;
16208
16209 parser->brace_nesting = 0;
16210 lex_state_set(parser, PM_LEX_STATE_BEG);
16211 parser_lex(parser);
16212
16213 pm_token_t opening = parser->previous;
16214 pm_statements_node_t *statements = NULL;
16215
16216 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16217 pm_accepts_block_stack_push(parser, true);
16218 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16219 pm_accepts_block_stack_pop(parser);
16220 }
16221
16222 parser->brace_nesting = brace_nesting;
16223 lex_state_set(parser, state);
16224
16225 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16226 pm_token_t closing = parser->previous;
16227
16228 // If this set of embedded statements only contains a single
16229 // statement, then Ruby does not consider it as a possible statement
16230 // that could emit a line event.
16231 if (statements != NULL && statements->body.size == 1) {
16232 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16233 }
16234
16235 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16236 }
16237
16238 // Here the lexer has returned the beginning of an embedded variable.
16239 // In that case we'll parse the variable and create an appropriate node
16240 // for it and then return that node. These kinds of parts look like:
16241 //
16242 // "aaa #{bbb} #@ccc ddd"
16243 // ^^^^^
16244 case PM_TOKEN_EMBVAR: {
16245 // Ruby disallows seeing encoding around interpolation in strings,
16246 // even though it is known at parse time.
16247 parser->explicit_encoding = NULL;
16248
16249 lex_state_set(parser, PM_LEX_STATE_BEG);
16250 parser_lex(parser);
16251
16252 pm_token_t operator = parser->previous;
16253 pm_node_t *variable;
16254
16255 switch (parser->current.type) {
16256 // In this case a back reference is being interpolated. We'll
16257 // create a global variable read node.
16258 case PM_TOKEN_BACK_REFERENCE:
16259 parser_lex(parser);
16260 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16261 break;
16262 // In this case an nth reference is being interpolated. We'll
16263 // create a global variable read node.
16264 case PM_TOKEN_NUMBERED_REFERENCE:
16265 parser_lex(parser);
16266 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16267 break;
16268 // In this case a global variable is being interpolated. We'll
16269 // create a global variable read node.
16270 case PM_TOKEN_GLOBAL_VARIABLE:
16271 parser_lex(parser);
16272 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16273 break;
16274 // In this case an instance variable is being interpolated.
16275 // We'll create an instance variable read node.
16276 case PM_TOKEN_INSTANCE_VARIABLE:
16277 parser_lex(parser);
16278 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16279 break;
16280 // In this case a class variable is being interpolated. We'll
16281 // create a class variable read node.
16282 case PM_TOKEN_CLASS_VARIABLE:
16283 parser_lex(parser);
16284 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16285 break;
16286 // We can hit here if we got an invalid token. In that case
16287 // we'll not attempt to lex this token and instead just return a
16288 // missing node.
16289 default:
16290 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16291 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16292 break;
16293 }
16294
16295 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16296 }
16297 default:
16298 parser_lex(parser);
16299 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16300 return NULL;
16301 }
16302}
16303
16309static const uint8_t *
16310parse_operator_symbol_name(const pm_token_t *name) {
16311 switch (name->type) {
16312 case PM_TOKEN_TILDE:
16313 case PM_TOKEN_BANG:
16314 if (name->end[-1] == '@') return name->end - 1;
16316 default:
16317 return name->end;
16318 }
16319}
16320
16321static pm_node_t *
16322parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16323 pm_token_t closing = not_provided(parser);
16324 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16325
16326 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16327
16328 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16329 parser_lex(parser);
16330
16331 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16332 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16333
16334 return (pm_node_t *) symbol;
16335}
16336
16342static pm_node_t *
16343parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16344 const pm_token_t opening = parser->previous;
16345
16346 if (lex_mode->mode != PM_LEX_STRING) {
16347 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16348
16349 switch (parser->current.type) {
16350 case PM_CASE_OPERATOR:
16351 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16352 case PM_TOKEN_IDENTIFIER:
16353 case PM_TOKEN_CONSTANT:
16354 case PM_TOKEN_INSTANCE_VARIABLE:
16355 case PM_TOKEN_METHOD_NAME:
16356 case PM_TOKEN_CLASS_VARIABLE:
16357 case PM_TOKEN_GLOBAL_VARIABLE:
16358 case PM_TOKEN_NUMBERED_REFERENCE:
16359 case PM_TOKEN_BACK_REFERENCE:
16360 case PM_CASE_KEYWORD:
16361 parser_lex(parser);
16362 break;
16363 default:
16364 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16365 break;
16366 }
16367
16368 pm_token_t closing = not_provided(parser);
16369 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16370
16371 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16372 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16373
16374 return (pm_node_t *) symbol;
16375 }
16376
16377 if (lex_mode->as.string.interpolation) {
16378 // If we have the end of the symbol, then we can return an empty symbol.
16379 if (match1(parser, PM_TOKEN_STRING_END)) {
16380 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16381 parser_lex(parser);
16382
16383 pm_token_t content = not_provided(parser);
16384 pm_token_t closing = parser->previous;
16385 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16386 }
16387
16388 // Now we can parse the first part of the symbol.
16389 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16390
16391 // If we got a string part, then it's possible that we could transform
16392 // what looks like an interpolated symbol into a regular symbol.
16393 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16394 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16395 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16396
16397 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16398 }
16399
16400 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16401 if (part) pm_interpolated_symbol_node_append(symbol, part);
16402
16403 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16404 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16405 pm_interpolated_symbol_node_append(symbol, part);
16406 }
16407 }
16408
16409 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16410 if (match1(parser, PM_TOKEN_EOF)) {
16411 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16412 } else {
16413 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16414 }
16415
16416 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16417 return (pm_node_t *) symbol;
16418 }
16419
16420 pm_token_t content;
16421 pm_string_t unescaped;
16422
16423 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16424 content = parser->current;
16425 unescaped = parser->current_string;
16426 parser_lex(parser);
16427
16428 // If we have two string contents in a row, then the content of this
16429 // symbol is split because of heredoc contents. This looks like:
16430 //
16431 // <<A; :'a
16432 // A
16433 // b'
16434 //
16435 // In this case, the best way we have to represent this is as an
16436 // interpolated string node, so that's what we'll do here.
16437 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16438 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16439 pm_token_t bounds = not_provided(parser);
16440
16441 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16442 pm_interpolated_symbol_node_append(symbol, part);
16443
16444 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16445 pm_interpolated_symbol_node_append(symbol, part);
16446
16447 if (next_state != PM_LEX_STATE_NONE) {
16448 lex_state_set(parser, next_state);
16449 }
16450
16451 parser_lex(parser);
16452 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16453
16454 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16455 return (pm_node_t *) symbol;
16456 }
16457 } else {
16458 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16459 pm_string_shared_init(&unescaped, content.start, content.end);
16460 }
16461
16462 if (next_state != PM_LEX_STATE_NONE) {
16463 lex_state_set(parser, next_state);
16464 }
16465
16466 if (match1(parser, PM_TOKEN_EOF)) {
16467 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16468 } else {
16469 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16470 }
16471
16472 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16473}
16474
16479static inline pm_node_t *
16480parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16481 switch (parser->current.type) {
16482 case PM_CASE_OPERATOR: {
16483 const pm_token_t opening = not_provided(parser);
16484 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16485 }
16486 case PM_CASE_KEYWORD:
16487 case PM_TOKEN_CONSTANT:
16488 case PM_TOKEN_IDENTIFIER:
16489 case PM_TOKEN_METHOD_NAME: {
16490 parser_lex(parser);
16491
16492 pm_token_t opening = not_provided(parser);
16493 pm_token_t closing = not_provided(parser);
16494 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16495
16496 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16497 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16498
16499 return (pm_node_t *) symbol;
16500 }
16501 case PM_TOKEN_SYMBOL_BEGIN: {
16502 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16503 parser_lex(parser);
16504
16505 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16506 }
16507 default:
16508 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16509 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16510 }
16511}
16512
16519static inline pm_node_t *
16520parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16521 switch (parser->current.type) {
16522 case PM_CASE_OPERATOR: {
16523 const pm_token_t opening = not_provided(parser);
16524 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16525 }
16526 case PM_CASE_KEYWORD:
16527 case PM_TOKEN_CONSTANT:
16528 case PM_TOKEN_IDENTIFIER:
16529 case PM_TOKEN_METHOD_NAME: {
16530 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16531 parser_lex(parser);
16532
16533 pm_token_t opening = not_provided(parser);
16534 pm_token_t closing = not_provided(parser);
16535 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16536
16537 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16538 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16539
16540 return (pm_node_t *) symbol;
16541 }
16542 case PM_TOKEN_SYMBOL_BEGIN: {
16543 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16544 parser_lex(parser);
16545
16546 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16547 }
16548 case PM_TOKEN_BACK_REFERENCE:
16549 parser_lex(parser);
16550 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16551 case PM_TOKEN_NUMBERED_REFERENCE:
16552 parser_lex(parser);
16553 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16554 case PM_TOKEN_GLOBAL_VARIABLE:
16555 parser_lex(parser);
16556 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16557 default:
16558 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16559 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16560 }
16561}
16562
16567static pm_node_t *
16568parse_variable(pm_parser_t *parser) {
16569 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16570 int depth;
16571 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16572
16573 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16574 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16575 }
16576
16577 pm_scope_t *current_scope = parser->current_scope;
16578 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16579 if (is_numbered_param) {
16580 // When you use a numbered parameter, it implies the existence of
16581 // all of the locals that exist before it. For example, referencing
16582 // _2 means that _1 must exist. Therefore here we loop through all
16583 // of the possibilities and add them into the constant pool.
16584 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16585 for (uint8_t number = 1; number <= maximum; number++) {
16586 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16587 }
16588
16589 if (!match1(parser, PM_TOKEN_EQUAL)) {
16590 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16591 }
16592
16593 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16594 pm_node_list_append(&current_scope->implicit_parameters, node);
16595
16596 return node;
16597 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16598 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16599 pm_node_list_append(&current_scope->implicit_parameters, node);
16600
16601 return node;
16602 }
16603 }
16604
16605 return NULL;
16606}
16607
16611static pm_node_t *
16612parse_variable_call(pm_parser_t *parser) {
16613 pm_node_flags_t flags = 0;
16614
16615 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16616 pm_node_t *node = parse_variable(parser);
16617 if (node != NULL) return node;
16618 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16619 }
16620
16621 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16622 pm_node_flag_set((pm_node_t *)node, flags);
16623
16624 return (pm_node_t *) node;
16625}
16626
16632static inline pm_token_t
16633parse_method_definition_name(pm_parser_t *parser) {
16634 switch (parser->current.type) {
16635 case PM_CASE_KEYWORD:
16636 case PM_TOKEN_CONSTANT:
16637 case PM_TOKEN_METHOD_NAME:
16638 parser_lex(parser);
16639 return parser->previous;
16640 case PM_TOKEN_IDENTIFIER:
16641 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16642 parser_lex(parser);
16643 return parser->previous;
16644 case PM_CASE_OPERATOR:
16645 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16646 parser_lex(parser);
16647 return parser->previous;
16648 default:
16649 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16650 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16651 }
16652}
16653
16654static void
16655parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16656 // Get a reference to the string struct that is being held by the string
16657 // node. This is the value we're going to actually manipulate.
16658 pm_string_ensure_owned(string);
16659
16660 // Now get the bounds of the existing string. We'll use this as a
16661 // destination to move bytes into. We'll also use it for bounds checking
16662 // since we don't require that these strings be null terminated.
16663 size_t dest_length = pm_string_length(string);
16664 const uint8_t *source_cursor = (uint8_t *) string->source;
16665 const uint8_t *source_end = source_cursor + dest_length;
16666
16667 // We're going to move bytes backward in the string when we get leading
16668 // whitespace, so we'll maintain a pointer to the current position in the
16669 // string that we're writing to.
16670 size_t trimmed_whitespace = 0;
16671
16672 // While we haven't reached the amount of common whitespace that we need to
16673 // trim and we haven't reached the end of the string, we'll keep trimming
16674 // whitespace. Trimming in this context means skipping over these bytes such
16675 // that they aren't copied into the new string.
16676 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16677 if (*source_cursor == '\t') {
16678 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16679 if (trimmed_whitespace > common_whitespace) break;
16680 } else {
16681 trimmed_whitespace++;
16682 }
16683
16684 source_cursor++;
16685 dest_length--;
16686 }
16687
16688 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16689 string->length = dest_length;
16690}
16691
16695static void
16696parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16697 // The next node should be dedented if it's the first node in the list or if
16698 // it follows a string node.
16699 bool dedent_next = true;
16700
16701 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16702 // keep around two indices: a read and a write. If we end up trimming all of
16703 // the whitespace from a node, then we'll drop it from the list entirely.
16704 size_t write_index = 0;
16705
16706 pm_node_t *node;
16707 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16708 // We're not manipulating child nodes that aren't strings. In this case
16709 // we'll skip past it and indicate that the subsequent node should not
16710 // be dedented.
16711 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16712 nodes->nodes[write_index++] = node;
16713 dedent_next = false;
16714 continue;
16715 }
16716
16717 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16718 if (dedent_next) {
16719 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16720 }
16721
16722 if (string_node->unescaped.length == 0) {
16723 pm_node_destroy(parser, node);
16724 } else {
16725 nodes->nodes[write_index++] = node;
16726 }
16727
16728 // We always dedent the next node if it follows a string node.
16729 dedent_next = true;
16730 }
16731
16732 nodes->size = write_index;
16733}
16734
16738static pm_token_t
16739parse_strings_empty_content(const uint8_t *location) {
16740 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16741}
16742
16746static inline pm_node_t *
16747parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16748 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16749 bool concating = false;
16750
16751 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16752 pm_node_t *node = NULL;
16753
16754 // Here we have found a string literal. We'll parse it and add it to
16755 // the list of strings.
16756 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16757 assert(lex_mode->mode == PM_LEX_STRING);
16758 bool lex_interpolation = lex_mode->as.string.interpolation;
16759 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16760
16761 pm_token_t opening = parser->current;
16762 parser_lex(parser);
16763
16764 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16765 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16766 // If we get here, then we have an end immediately after a
16767 // start. In that case we'll create an empty content token and
16768 // return an uninterpolated string.
16769 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16770 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16771
16772 pm_string_shared_init(&string->unescaped, content.start, content.end);
16773 node = (pm_node_t *) string;
16774 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16775 // If we get here, then we have an end of a label immediately
16776 // after a start. In that case we'll create an empty symbol
16777 // node.
16778 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16779 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16780
16781 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16782 node = (pm_node_t *) symbol;
16783
16784 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16785 } else if (!lex_interpolation) {
16786 // If we don't accept interpolation then we expect the string to
16787 // start with a single string content node.
16788 pm_string_t unescaped;
16789 pm_token_t content;
16790
16791 if (match1(parser, PM_TOKEN_EOF)) {
16792 unescaped = PM_STRING_EMPTY;
16793 content = not_provided(parser);
16794 } else {
16795 unescaped = parser->current_string;
16796 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16797 content = parser->previous;
16798 }
16799
16800 // It is unfortunately possible to have multiple string content
16801 // nodes in a row in the case that there's heredoc content in
16802 // the middle of the string, like this cursed example:
16803 //
16804 // <<-END+'b
16805 // a
16806 // END
16807 // c'+'d'
16808 //
16809 // In that case we need to switch to an interpolated string to
16810 // be able to contain all of the parts.
16811 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16812 pm_node_list_t parts = { 0 };
16813
16814 pm_token_t delimiters = not_provided(parser);
16815 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16816 pm_node_list_append(&parts, part);
16817
16818 do {
16819 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16820 pm_node_list_append(&parts, part);
16821 parser_lex(parser);
16822 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16823
16824 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16825 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16826
16827 pm_node_list_free(&parts);
16828 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16829 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16830 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16831 } else if (match1(parser, PM_TOKEN_EOF)) {
16832 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16833 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16834 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16835 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16836 } else {
16837 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16838 parser->previous.start = parser->previous.end;
16839 parser->previous.type = PM_TOKEN_MISSING;
16840 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16841 }
16842 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16843 // In this case we've hit string content so we know the string
16844 // at least has something in it. We'll need to check if the
16845 // following token is the end (in which case we can return a
16846 // plain string) or if it's not then it has interpolation.
16847 pm_token_t content = parser->current;
16848 pm_string_t unescaped = parser->current_string;
16849 parser_lex(parser);
16850
16851 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16852 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16853 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16854
16855 // Kind of odd behavior, but basically if we have an
16856 // unterminated string and it ends in a newline, we back up one
16857 // character so that the error message is on the last line of
16858 // content in the string.
16859 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16860 const uint8_t *location = parser->previous.end;
16861 if (location > parser->start && location[-1] == '\n') location--;
16862 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16863
16864 parser->previous.start = parser->previous.end;
16865 parser->previous.type = PM_TOKEN_MISSING;
16866 }
16867 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16868 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16869 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16870 } else {
16871 // If we get here, then we have interpolation so we'll need
16872 // to create a string or symbol node with interpolation.
16873 pm_node_list_t parts = { 0 };
16874 pm_token_t string_opening = not_provided(parser);
16875 pm_token_t string_closing = not_provided(parser);
16876
16877 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16878 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16879 pm_node_list_append(&parts, part);
16880
16881 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16882 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16883 pm_node_list_append(&parts, part);
16884 }
16885 }
16886
16887 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16888 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16889 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16890 } else if (match1(parser, PM_TOKEN_EOF)) {
16891 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16892 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16893 } else {
16894 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16895 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16896 }
16897
16898 pm_node_list_free(&parts);
16899 }
16900 } else {
16901 // If we get here, then the first part of the string is not plain
16902 // string content, in which case we need to parse the string as an
16903 // interpolated string.
16904 pm_node_list_t parts = { 0 };
16905 pm_node_t *part;
16906
16907 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16908 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16909 pm_node_list_append(&parts, part);
16910 }
16911 }
16912
16913 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16914 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16915 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16916 } else if (match1(parser, PM_TOKEN_EOF)) {
16917 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16918 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16919 } else {
16920 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16921 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16922 }
16923
16924 pm_node_list_free(&parts);
16925 }
16926
16927 if (current == NULL) {
16928 // If the node we just parsed is a symbol node, then we can't
16929 // concatenate it with anything else, so we can now return that
16930 // node.
16931 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16932 return node;
16933 }
16934
16935 // If we don't already have a node, then it's fine and we can just
16936 // set the result to be the node we just parsed.
16937 current = node;
16938 } else {
16939 // Otherwise we need to check the type of the node we just parsed.
16940 // If it cannot be concatenated with the previous node, then we'll
16941 // need to add a syntax error.
16942 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16943 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16944 }
16945
16946 // If we haven't already created our container for concatenation,
16947 // we'll do that now.
16948 if (!concating) {
16949 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16950 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16951 }
16952
16953 concating = true;
16954 pm_token_t bounds = not_provided(parser);
16955
16956 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16957 pm_interpolated_string_node_append(container, current);
16958 current = (pm_node_t *) container;
16959 }
16960
16961 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16962 }
16963 }
16964
16965 return current;
16966}
16967
16968#define PM_PARSE_PATTERN_SINGLE 0
16969#define PM_PARSE_PATTERN_TOP 1
16970#define PM_PARSE_PATTERN_MULTI 2
16971
16972static pm_node_t *
16973parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16974
16980static void
16981parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16982 // Skip this capture if it starts with an underscore.
16983 if (*location->start == '_') return;
16984
16985 if (pm_constant_id_list_includes(captures, capture)) {
16986 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16987 } else {
16988 pm_constant_id_list_append(captures, capture);
16989 }
16990}
16991
16995static pm_node_t *
16996parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16997 // Now, if there are any :: operators that follow, parse them as constant
16998 // path nodes.
16999 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
17000 pm_token_t delimiter = parser->previous;
17001 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17002 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
17003 }
17004
17005 // If there is a [ or ( that follows, then this is part of a larger pattern
17006 // expression. We'll parse the inner pattern here, then modify the returned
17007 // inner pattern with our constant path attached.
17008 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
17009 return node;
17010 }
17011
17012 pm_token_t opening;
17013 pm_token_t closing;
17014 pm_node_t *inner = NULL;
17015
17016 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
17017 opening = parser->previous;
17018 accept1(parser, PM_TOKEN_NEWLINE);
17019
17020 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17021 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17022 accept1(parser, PM_TOKEN_NEWLINE);
17023 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17024 }
17025
17026 closing = parser->previous;
17027 } else {
17028 parser_lex(parser);
17029 opening = parser->previous;
17030 accept1(parser, PM_TOKEN_NEWLINE);
17031
17032 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17033 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17034 accept1(parser, PM_TOKEN_NEWLINE);
17035 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17036 }
17037
17038 closing = parser->previous;
17039 }
17040
17041 if (!inner) {
17042 // If there was no inner pattern, then we have something like Foo() or
17043 // Foo[]. In that case we'll create an array pattern with no requireds.
17044 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
17045 }
17046
17047 // Now that we have the inner pattern, check to see if it's an array, find,
17048 // or hash pattern. If it is, then we'll attach our constant path to it if
17049 // it doesn't already have a constant. If it's not one of those node types
17050 // or it does have a constant, then we'll create an array pattern.
17051 switch (PM_NODE_TYPE(inner)) {
17052 case PM_ARRAY_PATTERN_NODE: {
17053 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17054
17055 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17056 pattern_node->base.location.start = node->location.start;
17057 pattern_node->base.location.end = closing.end;
17058
17059 pattern_node->constant = node;
17060 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17061 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17062
17063 return (pm_node_t *) pattern_node;
17064 }
17065
17066 break;
17067 }
17068 case PM_FIND_PATTERN_NODE: {
17069 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17070
17071 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17072 pattern_node->base.location.start = node->location.start;
17073 pattern_node->base.location.end = closing.end;
17074
17075 pattern_node->constant = node;
17076 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17077 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17078
17079 return (pm_node_t *) pattern_node;
17080 }
17081
17082 break;
17083 }
17084 case PM_HASH_PATTERN_NODE: {
17085 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
17086
17087 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17088 pattern_node->base.location.start = node->location.start;
17089 pattern_node->base.location.end = closing.end;
17090
17091 pattern_node->constant = node;
17092 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17093 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17094
17095 return (pm_node_t *) pattern_node;
17096 }
17097
17098 break;
17099 }
17100 default:
17101 break;
17102 }
17103
17104 // If we got here, then we didn't return one of the inner patterns by
17105 // attaching its constant. In this case we'll create an array pattern and
17106 // attach our constant to it.
17107 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
17108 pm_array_pattern_node_requireds_append(pattern_node, inner);
17109 return (pm_node_t *) pattern_node;
17110}
17111
17115static pm_splat_node_t *
17116parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17117 assert(parser->previous.type == PM_TOKEN_USTAR);
17118 pm_token_t operator = parser->previous;
17119 pm_node_t *name = NULL;
17120
17121 // Rest patterns don't necessarily have a name associated with them. So we
17122 // will check for that here. If they do, then we'll add it to the local
17123 // table since this pattern will cause it to become a local variable.
17124 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17125 pm_token_t identifier = parser->previous;
17126 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17127
17128 int depth;
17129 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17130 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17131 }
17132
17133 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17134 name = (pm_node_t *) pm_local_variable_target_node_create(
17135 parser,
17136 &PM_LOCATION_TOKEN_VALUE(&identifier),
17137 constant_id,
17138 (uint32_t) (depth == -1 ? 0 : depth)
17139 );
17140 }
17141
17142 // Finally we can return the created node.
17143 return pm_splat_node_create(parser, &operator, name);
17144}
17145
17149static pm_node_t *
17150parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17151 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17152 parser_lex(parser);
17153
17154 pm_token_t operator = parser->previous;
17155 pm_node_t *value = NULL;
17156
17157 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17158 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17159 }
17160
17161 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17162 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17163
17164 int depth;
17165 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17166 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17167 }
17168
17169 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17170 value = (pm_node_t *) pm_local_variable_target_node_create(
17171 parser,
17172 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17173 constant_id,
17174 (uint32_t) (depth == -1 ? 0 : depth)
17175 );
17176 }
17177
17178 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17179}
17180
17185static bool
17186pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17187 ptrdiff_t length = end - start;
17188 if (length == 0) return false;
17189
17190 // First ensure that it starts with a valid identifier starting character.
17191 size_t width = char_is_identifier_start(parser, start, end - start);
17192 if (width == 0) return false;
17193
17194 // Next, ensure that it's not an uppercase character.
17195 if (parser->encoding_changed) {
17196 if (parser->encoding->isupper_char(start, length)) return false;
17197 } else {
17198 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17199 }
17200
17201 // Next, iterate through all of the bytes of the string to ensure that they
17202 // are all valid identifier characters.
17203 const uint8_t *cursor = start + width;
17204 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17205 return cursor == end;
17206}
17207
17212static pm_node_t *
17213parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17214 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17215
17216 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17217 int depth = -1;
17218
17219 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17220 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17221 } else {
17222 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17223
17224 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17225 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17226 }
17227 }
17228
17229 if (depth == -1) {
17230 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17231 }
17232
17233 parse_pattern_capture(parser, captures, constant_id, value_loc);
17234 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17235 parser,
17236 value_loc,
17237 constant_id,
17238 (uint32_t) (depth == -1 ? 0 : depth)
17239 );
17240
17241 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17242}
17243
17248static void
17249parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17250 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17251 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17252 }
17253}
17254
17259parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17260 pm_node_list_t assocs = { 0 };
17261 pm_static_literals_t keys = { 0 };
17262 pm_node_t *rest = NULL;
17263
17264 switch (PM_NODE_TYPE(first_node)) {
17265 case PM_ASSOC_SPLAT_NODE:
17266 case PM_NO_KEYWORDS_PARAMETER_NODE:
17267 rest = first_node;
17268 break;
17269 case PM_SYMBOL_NODE: {
17270 if (pm_symbol_node_label_p(first_node)) {
17271 parse_pattern_hash_key(parser, &keys, first_node);
17272 pm_node_t *value;
17273
17274 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17275 // Otherwise, we will create an implicit local variable
17276 // target for the value.
17277 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17278 } else {
17279 // Here we have a value for the first assoc in the list, so
17280 // we will parse it now.
17281 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17282 }
17283
17284 pm_token_t operator = not_provided(parser);
17285 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17286
17287 pm_node_list_append(&assocs, assoc);
17288 break;
17289 }
17290 }
17292 default: {
17293 // If we get anything else, then this is an error. For this we'll
17294 // create a missing node for the value and create an assoc node for
17295 // the first node in the list.
17296 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17297 pm_parser_err_node(parser, first_node, diag_id);
17298
17299 pm_token_t operator = not_provided(parser);
17300 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17301 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17302
17303 pm_node_list_append(&assocs, assoc);
17304 break;
17305 }
17306 }
17307
17308 // If there are any other assocs, then we'll parse them now.
17309 while (accept1(parser, PM_TOKEN_COMMA)) {
17310 // Here we need to break to support trailing commas.
17311 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17312 // Trailing commas are not allowed to follow a rest pattern.
17313 if (rest != NULL) {
17314 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17315 }
17316
17317 break;
17318 }
17319
17320 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17321 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17322
17323 if (rest == NULL) {
17324 rest = assoc;
17325 } else {
17326 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17327 pm_node_list_append(&assocs, assoc);
17328 }
17329 } else {
17330 pm_node_t *key;
17331
17332 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17333 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17334
17335 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
17336 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17337 } else if (!pm_symbol_node_label_p(key)) {
17338 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17339 }
17340 } else {
17341 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17342 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17343 }
17344
17345 parse_pattern_hash_key(parser, &keys, key);
17346 pm_node_t *value = NULL;
17347
17348 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17349 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17350 } else {
17351 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17352 }
17353
17354 pm_token_t operator = not_provided(parser);
17355 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17356
17357 if (rest != NULL) {
17358 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17359 }
17360
17361 pm_node_list_append(&assocs, assoc);
17362 }
17363 }
17364
17365 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17366 xfree(assocs.nodes);
17367
17368 pm_static_literals_free(&keys);
17369 return node;
17370}
17371
17375static pm_node_t *
17376parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17377 switch (parser->current.type) {
17378 case PM_TOKEN_IDENTIFIER:
17379 case PM_TOKEN_METHOD_NAME: {
17380 parser_lex(parser);
17381 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17382
17383 int depth;
17384 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17385 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17386 }
17387
17388 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17389 return (pm_node_t *) pm_local_variable_target_node_create(
17390 parser,
17391 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17392 constant_id,
17393 (uint32_t) (depth == -1 ? 0 : depth)
17394 );
17395 }
17396 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17397 pm_token_t opening = parser->current;
17398 parser_lex(parser);
17399
17400 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17401 // If we have an empty array pattern, then we'll just return a new
17402 // array pattern node.
17403 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17404 }
17405
17406 // Otherwise, we'll parse the inner pattern, then deal with it depending
17407 // on the type it returns.
17408 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17409
17410 accept1(parser, PM_TOKEN_NEWLINE);
17411 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17412 pm_token_t closing = parser->previous;
17413
17414 switch (PM_NODE_TYPE(inner)) {
17415 case PM_ARRAY_PATTERN_NODE: {
17416 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17417 if (pattern_node->opening_loc.start == NULL) {
17418 pattern_node->base.location.start = opening.start;
17419 pattern_node->base.location.end = closing.end;
17420
17421 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17422 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17423
17424 return (pm_node_t *) pattern_node;
17425 }
17426
17427 break;
17428 }
17429 case PM_FIND_PATTERN_NODE: {
17430 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17431 if (pattern_node->opening_loc.start == NULL) {
17432 pattern_node->base.location.start = opening.start;
17433 pattern_node->base.location.end = closing.end;
17434
17435 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17436 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17437
17438 return (pm_node_t *) pattern_node;
17439 }
17440
17441 break;
17442 }
17443 default:
17444 break;
17445 }
17446
17447 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17448 pm_array_pattern_node_requireds_append(node, inner);
17449 return (pm_node_t *) node;
17450 }
17451 case PM_TOKEN_BRACE_LEFT: {
17452 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17453 parser->pattern_matching_newlines = false;
17454
17456 pm_token_t opening = parser->current;
17457 parser_lex(parser);
17458
17459 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17460 // If we have an empty hash pattern, then we'll just return a new hash
17461 // pattern node.
17462 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17463 } else {
17464 pm_node_t *first_node;
17465
17466 switch (parser->current.type) {
17467 case PM_TOKEN_LABEL:
17468 parser_lex(parser);
17469 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17470 break;
17471 case PM_TOKEN_USTAR_STAR:
17472 first_node = parse_pattern_keyword_rest(parser, captures);
17473 break;
17474 case PM_TOKEN_STRING_BEGIN:
17475 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17476 break;
17477 default: {
17478 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17479 parser_lex(parser);
17480
17481 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17482 break;
17483 }
17484 }
17485
17486 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17487
17488 accept1(parser, PM_TOKEN_NEWLINE);
17489 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17490 pm_token_t closing = parser->previous;
17491
17492 node->base.location.start = opening.start;
17493 node->base.location.end = closing.end;
17494
17495 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17496 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17497 }
17498
17499 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17500 return (pm_node_t *) node;
17501 }
17502 case PM_TOKEN_UDOT_DOT:
17503 case PM_TOKEN_UDOT_DOT_DOT: {
17504 pm_token_t operator = parser->current;
17505 parser_lex(parser);
17506
17507 // Since we have a unary range operator, we need to parse the subsequent
17508 // expression as the right side of the range.
17509 switch (parser->current.type) {
17510 case PM_CASE_PRIMITIVE: {
17511 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17512 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17513 }
17514 default: {
17515 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17516 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17517 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17518 }
17519 }
17520 }
17521 case PM_CASE_PRIMITIVE: {
17522 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17523
17524 // If we found a label, we need to immediately return to the caller.
17525 if (pm_symbol_node_label_p(node)) return node;
17526
17527 // Call nodes (arithmetic operations) are not allowed in patterns
17528 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17529 pm_parser_err_node(parser, node, diag_id);
17530 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17531 pm_node_destroy(parser, node);
17532 return (pm_node_t *) missing_node;
17533 }
17534
17535 // Now that we have a primitive, we need to check if it's part of a range.
17536 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17537 pm_token_t operator = parser->previous;
17538
17539 // Now that we have the operator, we need to check if this is followed
17540 // by another expression. If it is, then we will create a full range
17541 // node. Otherwise, we'll create an endless range.
17542 switch (parser->current.type) {
17543 case PM_CASE_PRIMITIVE: {
17544 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17545 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17546 }
17547 default:
17548 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17549 }
17550 }
17551
17552 return node;
17553 }
17554 case PM_TOKEN_CARET: {
17555 parser_lex(parser);
17556 pm_token_t operator = parser->previous;
17557
17558 // At this point we have a pin operator. We need to check the subsequent
17559 // expression to determine if it's a variable or an expression.
17560 switch (parser->current.type) {
17561 case PM_TOKEN_IDENTIFIER: {
17562 parser_lex(parser);
17563 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17564
17565 if (variable == NULL) {
17566 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17567 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17568 }
17569
17570 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17571 }
17572 case PM_TOKEN_INSTANCE_VARIABLE: {
17573 parser_lex(parser);
17574 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17575
17576 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17577 }
17578 case PM_TOKEN_CLASS_VARIABLE: {
17579 parser_lex(parser);
17580 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17581
17582 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17583 }
17584 case PM_TOKEN_GLOBAL_VARIABLE: {
17585 parser_lex(parser);
17586 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17587
17588 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17589 }
17590 case PM_TOKEN_NUMBERED_REFERENCE: {
17591 parser_lex(parser);
17592 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17593
17594 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17595 }
17596 case PM_TOKEN_BACK_REFERENCE: {
17597 parser_lex(parser);
17598 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17599
17600 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17601 }
17602 case PM_TOKEN_PARENTHESIS_LEFT: {
17603 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17604 parser->pattern_matching_newlines = false;
17605
17606 pm_token_t lparen = parser->current;
17607 parser_lex(parser);
17608
17609 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17610 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17611
17612 accept1(parser, PM_TOKEN_NEWLINE);
17613 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17614 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17615 }
17616 default: {
17617 // If we get here, then we have a pin operator followed by something
17618 // not understood. We'll create a missing node and return that.
17619 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17620 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17621 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17622 }
17623 }
17624 }
17625 case PM_TOKEN_UCOLON_COLON: {
17626 pm_token_t delimiter = parser->current;
17627 parser_lex(parser);
17628
17629 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17630 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17631
17632 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17633 }
17634 case PM_TOKEN_CONSTANT: {
17635 pm_token_t constant = parser->current;
17636 parser_lex(parser);
17637
17638 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17639 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17640 }
17641 default:
17642 pm_parser_err_current(parser, diag_id);
17643 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17644 }
17645}
17646
17651static pm_node_t *
17652parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17653 pm_node_t *node = first_node;
17654
17655 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17656 pm_token_t operator = parser->previous;
17657
17658 switch (parser->current.type) {
17659 case PM_TOKEN_IDENTIFIER:
17660 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17661 case PM_TOKEN_BRACE_LEFT:
17662 case PM_TOKEN_CARET:
17663 case PM_TOKEN_CONSTANT:
17664 case PM_TOKEN_UCOLON_COLON:
17665 case PM_TOKEN_UDOT_DOT:
17666 case PM_TOKEN_UDOT_DOT_DOT:
17667 case PM_CASE_PRIMITIVE: {
17668 if (node == NULL) {
17669 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17670 } else {
17671 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17672 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17673 }
17674
17675 break;
17676 }
17677 case PM_TOKEN_PARENTHESIS_LEFT:
17678 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17679 pm_token_t opening = parser->current;
17680 parser_lex(parser);
17681
17682 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17683 accept1(parser, PM_TOKEN_NEWLINE);
17684 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17685 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17686
17687 if (node == NULL) {
17688 node = right;
17689 } else {
17690 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17691 }
17692
17693 break;
17694 }
17695 default: {
17696 pm_parser_err_current(parser, diag_id);
17697 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17698
17699 if (node == NULL) {
17700 node = right;
17701 } else {
17702 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17703 }
17704
17705 break;
17706 }
17707 }
17708 }
17709
17710 // If we have an =>, then we are assigning this pattern to a variable.
17711 // In this case we should create an assignment node.
17712 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17713 pm_token_t operator = parser->previous;
17714 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17715
17716 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17717 int depth;
17718
17719 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17720 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17721 }
17722
17723 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17724 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17725 parser,
17726 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17727 constant_id,
17728 (uint32_t) (depth == -1 ? 0 : depth)
17729 );
17730
17731 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17732 }
17733
17734 return node;
17735}
17736
17740static pm_node_t *
17741parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17742 pm_node_t *node = NULL;
17743
17744 bool leading_rest = false;
17745 bool trailing_rest = false;
17746
17747 switch (parser->current.type) {
17748 case PM_TOKEN_LABEL: {
17749 parser_lex(parser);
17750 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17751 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17752
17753 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17754 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17755 }
17756
17757 return node;
17758 }
17759 case PM_TOKEN_USTAR_STAR: {
17760 node = parse_pattern_keyword_rest(parser, captures);
17761 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17762
17763 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17764 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17765 }
17766
17767 return node;
17768 }
17769 case PM_TOKEN_STRING_BEGIN: {
17770 // We need special handling for string beginnings because they could
17771 // be dynamic symbols leading to hash patterns.
17772 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17773
17774 if (pm_symbol_node_label_p(node)) {
17775 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17776
17777 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17778 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17779 }
17780
17781 return node;
17782 }
17783
17784 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17785 break;
17786 }
17787 case PM_TOKEN_USTAR: {
17788 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17789 parser_lex(parser);
17790 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17791 leading_rest = true;
17792 break;
17793 }
17794 }
17796 default:
17797 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17798 break;
17799 }
17800
17801 // If we got a dynamic label symbol, then we need to treat it like the
17802 // beginning of a hash pattern.
17803 if (pm_symbol_node_label_p(node)) {
17804 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17805 }
17806
17807 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17808 // If we have a comma, then we are now parsing either an array pattern
17809 // or a find pattern. We need to parse all of the patterns, put them
17810 // into a big list, and then determine which type of node we have.
17811 pm_node_list_t nodes = { 0 };
17812 pm_node_list_append(&nodes, node);
17813
17814 // Gather up all of the patterns into the list.
17815 while (accept1(parser, PM_TOKEN_COMMA)) {
17816 // Break early here in case we have a trailing comma.
17817 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17818 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17819 pm_node_list_append(&nodes, node);
17820 trailing_rest = true;
17821 break;
17822 }
17823
17824 if (accept1(parser, PM_TOKEN_USTAR)) {
17825 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17826
17827 // If we have already parsed a splat pattern, then this is an
17828 // error. We will continue to parse the rest of the patterns,
17829 // but we will indicate it as an error.
17830 if (trailing_rest) {
17831 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17832 }
17833
17834 trailing_rest = true;
17835 } else {
17836 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17837 }
17838
17839 pm_node_list_append(&nodes, node);
17840 }
17841
17842 // If the first pattern and the last pattern are rest patterns, then we
17843 // will call this a find pattern, regardless of how many rest patterns
17844 // are in between because we know we already added the appropriate
17845 // errors. Otherwise we will create an array pattern.
17846 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17847 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17848
17849 if (nodes.size == 2) {
17850 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17851 }
17852 } else {
17853 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17854
17855 if (leading_rest && trailing_rest) {
17856 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17857 }
17858 }
17859
17860 xfree(nodes.nodes);
17861 } else if (leading_rest) {
17862 // Otherwise, if we parsed a single splat pattern, then we know we have
17863 // an array pattern, so we can go ahead and create that node.
17864 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17865 }
17866
17867 return node;
17868}
17869
17875static inline void
17876parse_negative_numeric(pm_node_t *node) {
17877 switch (PM_NODE_TYPE(node)) {
17878 case PM_INTEGER_NODE: {
17879 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17880 cast->base.location.start--;
17881 cast->value.negative = true;
17882 break;
17883 }
17884 case PM_FLOAT_NODE: {
17885 pm_float_node_t *cast = (pm_float_node_t *) node;
17886 cast->base.location.start--;
17887 cast->value = -cast->value;
17888 break;
17889 }
17890 case PM_RATIONAL_NODE: {
17891 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17892 cast->base.location.start--;
17893 cast->numerator.negative = true;
17894 break;
17895 }
17896 case PM_IMAGINARY_NODE:
17897 node->location.start--;
17898 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17899 break;
17900 default:
17901 assert(false && "unreachable");
17902 break;
17903 }
17904}
17905
17911static void
17912pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17913 switch (diag_id) {
17914 case PM_ERR_HASH_KEY: {
17915 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17916 break;
17917 }
17918 case PM_ERR_HASH_VALUE:
17919 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17920 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17921 break;
17922 }
17923 case PM_ERR_UNARY_RECEIVER: {
17924 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17925 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17926 break;
17927 }
17928 case PM_ERR_UNARY_DISALLOWED:
17929 case PM_ERR_EXPECT_ARGUMENT: {
17930 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17931 break;
17932 }
17933 default:
17934 pm_parser_err_previous(parser, diag_id);
17935 break;
17936 }
17937}
17938
17942static void
17943parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17944#define CONTEXT_NONE 0
17945#define CONTEXT_THROUGH_ENSURE 1
17946#define CONTEXT_THROUGH_ELSE 2
17947
17948 pm_context_node_t *context_node = parser->current_context;
17949 int context = CONTEXT_NONE;
17950
17951 while (context_node != NULL) {
17952 switch (context_node->context) {
17960 case PM_CONTEXT_DEFINED:
17962 // These are the good cases. We're allowed to have a retry here.
17963 return;
17964 case PM_CONTEXT_CLASS:
17965 case PM_CONTEXT_DEF:
17967 case PM_CONTEXT_MAIN:
17968 case PM_CONTEXT_MODULE:
17969 case PM_CONTEXT_PREEXE:
17970 case PM_CONTEXT_SCLASS:
17971 // These are the bad cases. We're not allowed to have a retry in
17972 // these contexts.
17973 if (context == CONTEXT_NONE) {
17974 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17975 } else if (context == CONTEXT_THROUGH_ENSURE) {
17976 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17977 } else if (context == CONTEXT_THROUGH_ELSE) {
17978 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17979 }
17980 return;
17988 // These are also bad cases, but with a more specific error
17989 // message indicating the else.
17990 context = CONTEXT_THROUGH_ELSE;
17991 break;
17999 // These are also bad cases, but with a more specific error
18000 // message indicating the ensure.
18001 context = CONTEXT_THROUGH_ENSURE;
18002 break;
18003 case PM_CONTEXT_NONE:
18004 // This case should never happen.
18005 assert(false && "unreachable");
18006 break;
18007 case PM_CONTEXT_BEGIN:
18010 case PM_CONTEXT_CASE_IN:
18013 case PM_CONTEXT_ELSE:
18014 case PM_CONTEXT_ELSIF:
18015 case PM_CONTEXT_EMBEXPR:
18017 case PM_CONTEXT_FOR:
18018 case PM_CONTEXT_IF:
18023 case PM_CONTEXT_PARENS:
18024 case PM_CONTEXT_POSTEXE:
18026 case PM_CONTEXT_TERNARY:
18027 case PM_CONTEXT_UNLESS:
18028 case PM_CONTEXT_UNTIL:
18029 case PM_CONTEXT_WHILE:
18030 // In these contexts we should continue walking up the list of
18031 // contexts.
18032 break;
18033 }
18034
18035 context_node = context_node->prev;
18036 }
18037
18038#undef CONTEXT_NONE
18039#undef CONTEXT_ENSURE
18040#undef CONTEXT_ELSE
18041}
18042
18046static void
18047parse_yield(pm_parser_t *parser, const pm_node_t *node) {
18048 pm_context_node_t *context_node = parser->current_context;
18049
18050 while (context_node != NULL) {
18051 switch (context_node->context) {
18052 case PM_CONTEXT_DEF:
18054 case PM_CONTEXT_DEFINED:
18058 // These are the good cases. We're allowed to have a block exit
18059 // in these contexts.
18060 return;
18061 case PM_CONTEXT_CLASS:
18065 case PM_CONTEXT_MAIN:
18066 case PM_CONTEXT_MODULE:
18070 case PM_CONTEXT_SCLASS:
18074 // These are the bad cases. We're not allowed to have a retry in
18075 // these contexts.
18076 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
18077 return;
18078 case PM_CONTEXT_NONE:
18079 // This case should never happen.
18080 assert(false && "unreachable");
18081 break;
18082 case PM_CONTEXT_BEGIN:
18091 case PM_CONTEXT_CASE_IN:
18094 case PM_CONTEXT_ELSE:
18095 case PM_CONTEXT_ELSIF:
18096 case PM_CONTEXT_EMBEXPR:
18098 case PM_CONTEXT_FOR:
18099 case PM_CONTEXT_IF:
18107 case PM_CONTEXT_PARENS:
18108 case PM_CONTEXT_POSTEXE:
18110 case PM_CONTEXT_PREEXE:
18112 case PM_CONTEXT_TERNARY:
18113 case PM_CONTEXT_UNLESS:
18114 case PM_CONTEXT_UNTIL:
18115 case PM_CONTEXT_WHILE:
18116 // In these contexts we should continue walking up the list of
18117 // contexts.
18118 break;
18119 }
18120
18121 context_node = context_node->prev;
18122 }
18123}
18124
18129typedef struct {
18132
18134 const uint8_t *start;
18135
18137 const uint8_t *end;
18138
18147
18152static void
18153parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18155 pm_location_t location;
18156
18157 if (callback_data->shared) {
18158 location = (pm_location_t) { .start = start, .end = end };
18159 } else {
18160 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18161 }
18162
18163 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18164}
18165
18169static void
18170parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18171 const pm_string_t *unescaped = &node->unescaped;
18173 .parser = parser,
18174 .start = node->base.location.start,
18175 .end = node->base.location.end,
18176 .shared = unescaped->type == PM_STRING_SHARED
18177 };
18178
18179 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18180}
18181
18185static inline pm_node_t *
18186parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18187 switch (parser->current.type) {
18188 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
18189 parser_lex(parser);
18190
18191 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18192 pm_accepts_block_stack_push(parser, true);
18193 bool parsed_bare_hash = false;
18194
18195 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18196 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18197
18198 // Handle the case where we don't have a comma and we have a
18199 // newline followed by a right bracket.
18200 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18201 break;
18202 }
18203
18204 // Ensure that we have a comma between elements in the array.
18205 if (array->elements.size > 0) {
18206 if (accept1(parser, PM_TOKEN_COMMA)) {
18207 // If there was a comma but we also accepts a newline,
18208 // then this is a syntax error.
18209 if (accepted_newline) {
18210 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18211 }
18212 } else {
18213 // If there was no comma, then we need to add a syntax
18214 // error.
18215 const uint8_t *location = parser->previous.end;
18216 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18217
18218 parser->previous.start = location;
18219 parser->previous.type = PM_TOKEN_MISSING;
18220 }
18221 }
18222
18223 // If we have a right bracket immediately following a comma,
18224 // this is allowed since it's a trailing comma. In this case we
18225 // can break out of the loop.
18226 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18227
18228 pm_node_t *element;
18229
18230 if (accept1(parser, PM_TOKEN_USTAR)) {
18231 pm_token_t operator = parser->previous;
18232 pm_node_t *expression = NULL;
18233
18234 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18235 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18236 } else {
18237 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18238 }
18239
18240 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18241 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18242 if (parsed_bare_hash) {
18243 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18244 }
18245
18246 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18247 pm_static_literals_t hash_keys = { 0 };
18248
18249 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
18250 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18251 }
18252
18253 pm_static_literals_free(&hash_keys);
18254 parsed_bare_hash = true;
18255 } else {
18256 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18257
18258 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18259 if (parsed_bare_hash) {
18260 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18261 }
18262
18263 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18264 pm_static_literals_t hash_keys = { 0 };
18265 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18266
18267 pm_token_t operator;
18268 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18269 operator = parser->previous;
18270 } else {
18271 operator = not_provided(parser);
18272 }
18273
18274 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18275 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18276 pm_keyword_hash_node_elements_append(hash, assoc);
18277
18278 element = (pm_node_t *) hash;
18279 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18280 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18281 }
18282
18283 pm_static_literals_free(&hash_keys);
18284 parsed_bare_hash = true;
18285 }
18286 }
18287
18288 pm_array_node_elements_append(array, element);
18289 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18290 }
18291
18292 accept1(parser, PM_TOKEN_NEWLINE);
18293
18294 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18295 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18296 parser->previous.start = parser->previous.end;
18297 parser->previous.type = PM_TOKEN_MISSING;
18298 }
18299
18300 pm_array_node_close_set(array, &parser->previous);
18301 pm_accepts_block_stack_pop(parser);
18302
18303 return (pm_node_t *) array;
18304 }
18305 case PM_TOKEN_PARENTHESIS_LEFT:
18306 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18307 pm_token_t opening = parser->current;
18308 pm_node_flags_t flags = 0;
18309
18310 pm_node_list_t current_block_exits = { 0 };
18311 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18312
18313 parser_lex(parser);
18314 while (true) {
18315 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18316 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18317 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18318 break;
18319 }
18320 }
18321
18322 // If this is the end of the file or we match a right parenthesis, then
18323 // we have an empty parentheses node, and we can immediately return.
18324 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18325 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18326
18327 pop_block_exits(parser, previous_block_exits);
18328 pm_node_list_free(&current_block_exits);
18329
18330 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18331 }
18332
18333 // Otherwise, we're going to parse the first statement in the list
18334 // of statements within the parentheses.
18335 pm_accepts_block_stack_push(parser, true);
18336 context_push(parser, PM_CONTEXT_PARENS);
18337 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18338 context_pop(parser);
18339
18340 // Determine if this statement is followed by a terminator. In the
18341 // case of a single statement, this is fine. But in the case of
18342 // multiple statements it's required.
18343 bool terminator_found = false;
18344
18345 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18346 terminator_found = true;
18347 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18348 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18349 terminator_found = true;
18350 }
18351
18352 if (terminator_found) {
18353 while (true) {
18354 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18355 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18356 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18357 break;
18358 }
18359 }
18360 }
18361
18362 // If we hit a right parenthesis, then we're done parsing the
18363 // parentheses node, and we can check which kind of node we should
18364 // return.
18365 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18366 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18367 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18368 }
18369
18370 parser_lex(parser);
18371 pm_accepts_block_stack_pop(parser);
18372
18373 pop_block_exits(parser, previous_block_exits);
18374 pm_node_list_free(&current_block_exits);
18375
18376 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18377 // If we have a single statement and are ending on a right
18378 // parenthesis, then we need to check if this is possibly a
18379 // multiple target node.
18380 pm_multi_target_node_t *multi_target;
18381
18382 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18383 multi_target = (pm_multi_target_node_t *) statement;
18384 } else {
18385 multi_target = pm_multi_target_node_create(parser);
18386 pm_multi_target_node_targets_append(parser, multi_target, statement);
18387 }
18388
18389 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18390 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18391
18392 multi_target->lparen_loc = lparen_loc;
18393 multi_target->rparen_loc = rparen_loc;
18394 multi_target->base.location.start = lparen_loc.start;
18395 multi_target->base.location.end = rparen_loc.end;
18396
18397 pm_node_t *result;
18398 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18399 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18400 accept1(parser, PM_TOKEN_NEWLINE);
18401 } else {
18402 result = (pm_node_t *) multi_target;
18403 }
18404
18405 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18406 // All set, this is explicitly allowed by the parent
18407 // context.
18408 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18409 // All set, we're inside a for loop and we're parsing
18410 // multiple targets.
18411 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18412 // Multi targets are not allowed when it's not a
18413 // statement level.
18414 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18415 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18416 // Multi targets must be followed by an equal sign in
18417 // order to be valid (or a right parenthesis if they are
18418 // nested).
18419 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18420 }
18421
18422 return result;
18423 }
18424
18425 // If we have a single statement and are ending on a right parenthesis
18426 // and we didn't return a multiple assignment node, then we can return a
18427 // regular parentheses node now.
18428 pm_statements_node_t *statements = pm_statements_node_create(parser);
18429 pm_statements_node_body_append(parser, statements, statement, true);
18430
18431 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18432 }
18433
18434 // If we have more than one statement in the set of parentheses,
18435 // then we are going to parse all of them as a list of statements.
18436 // We'll do that here.
18437 context_push(parser, PM_CONTEXT_PARENS);
18438 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18439
18440 pm_statements_node_t *statements = pm_statements_node_create(parser);
18441 pm_statements_node_body_append(parser, statements, statement, true);
18442
18443 // If we didn't find a terminator and we didn't find a right
18444 // parenthesis, then this is a syntax error.
18445 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18446 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18447 }
18448
18449 // Parse each statement within the parentheses.
18450 while (true) {
18451 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18452 pm_statements_node_body_append(parser, statements, node, true);
18453
18454 // If we're recovering from a syntax error, then we need to stop
18455 // parsing the statements now.
18456 if (parser->recovering) {
18457 // If this is the level of context where the recovery has
18458 // happened, then we can mark the parser as done recovering.
18459 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18460 break;
18461 }
18462
18463 // If we couldn't parse an expression at all, then we need to
18464 // bail out of the loop.
18465 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18466
18467 // If we successfully parsed a statement, then we are going to
18468 // need terminator to delimit them.
18469 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18470 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18471 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18472 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18473 break;
18474 } else if (!match1(parser, PM_TOKEN_EOF)) {
18475 // If we're at the end of the file, then we're going to add
18476 // an error after this for the ) anyway.
18477 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18478 }
18479 }
18480
18481 context_pop(parser);
18482 pm_accepts_block_stack_pop(parser);
18483 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18484
18485 // When we're parsing multi targets, we allow them to be followed by
18486 // a right parenthesis if they are at the statement level. This is
18487 // only possible if they are the final statement in a parentheses.
18488 // We need to explicitly reject that here.
18489 {
18490 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18491
18492 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18493 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18494 pm_multi_target_node_targets_append(parser, multi_target, statement);
18495
18496 statement = (pm_node_t *) multi_target;
18497 statements->body.nodes[statements->body.size - 1] = statement;
18498 }
18499
18500 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18501 const uint8_t *offset = statement->location.end;
18502 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18503 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18504
18505 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18506 statements->body.nodes[statements->body.size - 1] = statement;
18507
18508 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18509 }
18510 }
18511
18512 pop_block_exits(parser, previous_block_exits);
18513 pm_node_list_free(&current_block_exits);
18514
18515 pm_void_statements_check(parser, statements, true);
18516 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18517 }
18518 case PM_TOKEN_BRACE_LEFT: {
18519 // If we were passed a current_hash_keys via the parser, then that
18520 // means we're already parsing a hash and we want to share the set
18521 // of hash keys with this inner hash we're about to parse for the
18522 // sake of warnings. We'll set it to NULL after we grab it to make
18523 // sure subsequent expressions don't use it. Effectively this is a
18524 // way of getting around passing it to every call to
18525 // parse_expression.
18526 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18527 parser->current_hash_keys = NULL;
18528
18529 pm_accepts_block_stack_push(parser, true);
18530 parser_lex(parser);
18531
18532 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18533
18534 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18535 if (current_hash_keys != NULL) {
18536 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18537 } else {
18538 pm_static_literals_t hash_keys = { 0 };
18539 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18540 pm_static_literals_free(&hash_keys);
18541 }
18542
18543 accept1(parser, PM_TOKEN_NEWLINE);
18544 }
18545
18546 pm_accepts_block_stack_pop(parser);
18547 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18548 pm_hash_node_closing_loc_set(node, &parser->previous);
18549
18550 return (pm_node_t *) node;
18551 }
18552 case PM_TOKEN_CHARACTER_LITERAL: {
18553 pm_token_t closing = not_provided(parser);
18554 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(
18555 parser,
18556 &(pm_token_t) {
18557 .type = PM_TOKEN_STRING_BEGIN,
18558 .start = parser->current.start,
18559 .end = parser->current.start + 1
18560 },
18561 &(pm_token_t) {
18562 .type = PM_TOKEN_STRING_CONTENT,
18563 .start = parser->current.start + 1,
18564 .end = parser->current.end
18565 },
18566 &closing
18567 );
18568
18569 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18570
18571 // Skip past the character literal here, since now we have handled
18572 // parser->explicit_encoding correctly.
18573 parser_lex(parser);
18574
18575 // Characters can be followed by strings in which case they are
18576 // automatically concatenated.
18577 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18578 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18579 }
18580
18581 return node;
18582 }
18583 case PM_TOKEN_CLASS_VARIABLE: {
18584 parser_lex(parser);
18585 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18586
18587 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18588 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18589 }
18590
18591 return node;
18592 }
18593 case PM_TOKEN_CONSTANT: {
18594 parser_lex(parser);
18595 pm_token_t constant = parser->previous;
18596
18597 // If a constant is immediately followed by parentheses, then this is in
18598 // fact a method call, not a constant read.
18599 if (
18600 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18601 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18602 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18603 match1(parser, PM_TOKEN_BRACE_LEFT)
18604 ) {
18605 pm_arguments_t arguments = { 0 };
18606 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18607 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18608 }
18609
18610 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18611
18612 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18613 // If we get here, then we have a comma immediately following a
18614 // constant, so we're going to parse this as a multiple assignment.
18615 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18616 }
18617
18618 return node;
18619 }
18620 case PM_TOKEN_UCOLON_COLON: {
18621 parser_lex(parser);
18622 pm_token_t delimiter = parser->previous;
18623
18624 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18625 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18626
18627 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18628 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18629 }
18630
18631 return node;
18632 }
18633 case PM_TOKEN_UDOT_DOT:
18634 case PM_TOKEN_UDOT_DOT_DOT: {
18635 pm_token_t operator = parser->current;
18636 parser_lex(parser);
18637
18638 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18639
18640 // Unary .. and ... are special because these are non-associative
18641 // operators that can also be unary operators. In this case we need
18642 // to explicitly reject code that has a .. or ... that follows this
18643 // expression.
18644 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18645 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18646 }
18647
18648 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18649 }
18650 case PM_TOKEN_FLOAT:
18651 parser_lex(parser);
18652 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18653 case PM_TOKEN_FLOAT_IMAGINARY:
18654 parser_lex(parser);
18655 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18656 case PM_TOKEN_FLOAT_RATIONAL:
18657 parser_lex(parser);
18658 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18659 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
18660 parser_lex(parser);
18661 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18662 case PM_TOKEN_NUMBERED_REFERENCE: {
18663 parser_lex(parser);
18664 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18665
18666 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18667 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18668 }
18669
18670 return node;
18671 }
18672 case PM_TOKEN_GLOBAL_VARIABLE: {
18673 parser_lex(parser);
18674 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18675
18676 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18677 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18678 }
18679
18680 return node;
18681 }
18682 case PM_TOKEN_BACK_REFERENCE: {
18683 parser_lex(parser);
18684 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18685
18686 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18687 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18688 }
18689
18690 return node;
18691 }
18692 case PM_TOKEN_IDENTIFIER:
18693 case PM_TOKEN_METHOD_NAME: {
18694 parser_lex(parser);
18695 pm_token_t identifier = parser->previous;
18696 pm_node_t *node = parse_variable_call(parser);
18697
18698 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18699 // If parse_variable_call returned with a call node, then we
18700 // know the identifier is not in the local table. In that case
18701 // we need to check if there are arguments following the
18702 // identifier.
18703 pm_call_node_t *call = (pm_call_node_t *) node;
18704 pm_arguments_t arguments = { 0 };
18705
18706 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18707 // Since we found arguments, we need to turn off the
18708 // variable call bit in the flags.
18709 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18710
18711 call->opening_loc = arguments.opening_loc;
18712 call->arguments = arguments.arguments;
18713 call->closing_loc = arguments.closing_loc;
18714 call->block = arguments.block;
18715
18716 const uint8_t *end = pm_arguments_end(&arguments);
18717 if (!end) {
18718 end = call->message_loc.end;
18719 }
18720 call->base.location.end = end;
18721 }
18722 } else {
18723 // Otherwise, we know the identifier is in the local table. This
18724 // can still be a method call if it is followed by arguments or
18725 // a block, so we need to check for that here.
18726 if (
18727 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18728 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18729 match1(parser, PM_TOKEN_BRACE_LEFT)
18730 ) {
18731 pm_arguments_t arguments = { 0 };
18732 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18733 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18734
18735 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18736 // If we're about to convert an 'it' implicit local
18737 // variable read into a method call, we need to remove
18738 // it from the list of implicit local variables.
18739 parse_target_implicit_parameter(parser, node);
18740 } else {
18741 // Otherwise, we're about to convert a regular local
18742 // variable read into a method call, in which case we
18743 // need to indicate that this was not a read for the
18744 // purposes of warnings.
18745 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18746
18747 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18748 parse_target_implicit_parameter(parser, node);
18749 } else {
18751 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18752 }
18753 }
18754
18755 pm_node_destroy(parser, node);
18756 return (pm_node_t *) fcall;
18757 }
18758 }
18759
18760 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18761 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18762 }
18763
18764 return node;
18765 }
18766 case PM_TOKEN_HEREDOC_START: {
18767 // Here we have found a heredoc. We'll parse it and add it to the
18768 // list of strings.
18769 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18770 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18771
18772 size_t common_whitespace = (size_t) -1;
18773 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18774
18775 parser_lex(parser);
18776 pm_token_t opening = parser->previous;
18777
18778 pm_node_t *node;
18779 pm_node_t *part;
18780
18781 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18782 // If we get here, then we have an empty heredoc. We'll create
18783 // an empty content token and return an empty string node.
18784 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18785 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18786
18787 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18788 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18789 } else {
18790 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18791 }
18792
18793 node->location.end = opening.end;
18794 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18795 // If we get here, then we tried to find something in the
18796 // heredoc but couldn't actually parse anything, so we'll just
18797 // return a missing node.
18798 //
18799 // parse_string_part handles its own errors, so there is no need
18800 // for us to add one here.
18801 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18802 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18803 // If we get here, then the part that we parsed was plain string
18804 // content and we're at the end of the heredoc, so we can return
18805 // just a string node with the heredoc opening and closing as
18806 // its opening and closing.
18807 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18808 pm_string_node_t *cast = (pm_string_node_t *) part;
18809
18810 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18811 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18812 cast->base.location = cast->opening_loc;
18813
18814 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18815 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18816 cast->base.type = PM_X_STRING_NODE;
18817 }
18818
18819 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18820 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18821 }
18822
18823 node = (pm_node_t *) cast;
18824 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18825 } else {
18826 // If we get here, then we have multiple parts in the heredoc,
18827 // so we'll need to create an interpolated string node to hold
18828 // them all.
18829 pm_node_list_t parts = { 0 };
18830 pm_node_list_append(&parts, part);
18831
18832 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18833 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18834 pm_node_list_append(&parts, part);
18835 }
18836 }
18837
18838 // Now that we have all of the parts, create the correct type of
18839 // interpolated node.
18840 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18841 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18842 cast->parts = parts;
18843
18844 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18845 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18846
18847 cast->base.location = cast->opening_loc;
18848 node = (pm_node_t *) cast;
18849 } else {
18850 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18851 pm_node_list_free(&parts);
18852
18853 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18854 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18855
18856 cast->base.location = cast->opening_loc;
18857 node = (pm_node_t *) cast;
18858 }
18859
18860 // If this is a heredoc that is indented with a ~, then we need
18861 // to dedent each line by the common leading whitespace.
18862 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18863 pm_node_list_t *nodes;
18864 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18865 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18866 } else {
18867 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18868 }
18869
18870 parse_heredoc_dedent(parser, nodes, common_whitespace);
18871 }
18872 }
18873
18874 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18875 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18876 }
18877
18878 return node;
18879 }
18880 case PM_TOKEN_INSTANCE_VARIABLE: {
18881 parser_lex(parser);
18882 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18883
18884 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18885 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18886 }
18887
18888 return node;
18889 }
18890 case PM_TOKEN_INTEGER: {
18891 pm_node_flags_t base = parser->integer_base;
18892 parser_lex(parser);
18893 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18894 }
18895 case PM_TOKEN_INTEGER_IMAGINARY: {
18896 pm_node_flags_t base = parser->integer_base;
18897 parser_lex(parser);
18898 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18899 }
18900 case PM_TOKEN_INTEGER_RATIONAL: {
18901 pm_node_flags_t base = parser->integer_base;
18902 parser_lex(parser);
18903 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18904 }
18905 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18906 pm_node_flags_t base = parser->integer_base;
18907 parser_lex(parser);
18908 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18909 }
18910 case PM_TOKEN_KEYWORD___ENCODING__:
18911 parser_lex(parser);
18912 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18913 case PM_TOKEN_KEYWORD___FILE__:
18914 parser_lex(parser);
18915 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18916 case PM_TOKEN_KEYWORD___LINE__:
18917 parser_lex(parser);
18918 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18919 case PM_TOKEN_KEYWORD_ALIAS: {
18920 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18921 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18922 }
18923
18924 parser_lex(parser);
18925 pm_token_t keyword = parser->previous;
18926
18927 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18928 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18929
18930 switch (PM_NODE_TYPE(new_name)) {
18931 case PM_BACK_REFERENCE_READ_NODE:
18932 case PM_NUMBERED_REFERENCE_READ_NODE:
18933 case PM_GLOBAL_VARIABLE_READ_NODE: {
18934 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18935 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18936 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18937 }
18938 } else {
18939 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18940 }
18941
18942 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18943 }
18944 case PM_SYMBOL_NODE:
18945 case PM_INTERPOLATED_SYMBOL_NODE: {
18946 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18947 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18948 }
18949 }
18951 default:
18952 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18953 }
18954 }
18955 case PM_TOKEN_KEYWORD_CASE: {
18956 size_t opening_newline_index = token_newline_index(parser);
18957 parser_lex(parser);
18958
18959 pm_token_t case_keyword = parser->previous;
18960 pm_node_t *predicate = NULL;
18961
18962 pm_node_list_t current_block_exits = { 0 };
18963 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18964
18965 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18966 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18967 predicate = NULL;
18968 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18969 predicate = NULL;
18970 } else if (!token_begins_expression_p(parser->current.type)) {
18971 predicate = NULL;
18972 } else {
18973 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18974 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18975 }
18976
18977 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18978 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18979 parser_lex(parser);
18980
18981 pop_block_exits(parser, previous_block_exits);
18982 pm_node_list_free(&current_block_exits);
18983
18984 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18985 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18986 }
18987
18988 // At this point we can create a case node, though we don't yet know
18989 // if it is a case-in or case-when node.
18990 pm_token_t end_keyword = not_provided(parser);
18991 pm_node_t *node;
18992
18993 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18994 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18995 pm_static_literals_t literals = { 0 };
18996
18997 // At this point we've seen a when keyword, so we know this is a
18998 // case-when node. We will continue to parse the when nodes
18999 // until we hit the end of the list.
19000 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
19001 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
19002 parser_lex(parser);
19003
19004 pm_token_t when_keyword = parser->previous;
19005 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
19006
19007 do {
19008 if (accept1(parser, PM_TOKEN_USTAR)) {
19009 pm_token_t operator = parser->previous;
19010 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19011
19012 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
19013 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
19014
19015 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
19016 } else {
19017 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
19018 pm_when_node_conditions_append(when_node, condition);
19019
19020 // If we found a missing node, then this is a syntax
19021 // error and we should stop looping.
19022 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
19023
19024 // If this is a string node, then we need to mark it
19025 // as frozen because when clause strings are frozen.
19026 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
19027 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
19028 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
19029 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
19030 }
19031
19032 pm_when_clause_static_literals_add(parser, &literals, condition);
19033 }
19034 } while (accept1(parser, PM_TOKEN_COMMA));
19035
19036 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19037 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
19038 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
19039 }
19040 } else {
19041 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
19042 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
19043 }
19044
19045 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19046 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
19047 if (statements != NULL) {
19048 pm_when_node_statements_set(when_node, statements);
19049 }
19050 }
19051
19052 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
19053 }
19054
19055 // If we didn't parse any conditions (in or when) then we need
19056 // to indicate that we have an error.
19057 if (case_node->conditions.size == 0) {
19058 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19059 }
19060
19061 pm_static_literals_free(&literals);
19062 node = (pm_node_t *) case_node;
19063 } else {
19064 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
19065
19066 // If this is a case-match node (i.e., it is a pattern matching
19067 // case statement) then we must have a predicate.
19068 if (predicate == NULL) {
19069 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
19070 }
19071
19072 // At this point we expect that we're parsing a case-in node. We
19073 // will continue to parse the in nodes until we hit the end of
19074 // the list.
19075 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
19076 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
19077
19078 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
19079 parser->pattern_matching_newlines = true;
19080
19081 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
19082 parser->command_start = false;
19083 parser_lex(parser);
19084
19085 pm_token_t in_keyword = parser->previous;
19086
19087 pm_constant_id_list_t captures = { 0 };
19088 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
19089
19090 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
19091 pm_constant_id_list_free(&captures);
19092
19093 // Since we're in the top-level of the case-in node we need
19094 // to check for guard clauses in the form of `if` or
19095 // `unless` statements.
19096 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
19097 pm_token_t keyword = parser->previous;
19098 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
19099 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
19100 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
19101 pm_token_t keyword = parser->previous;
19102 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
19103 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
19104 }
19105
19106 // Now we need to check for the terminator of the in node's
19107 // pattern. It can be a newline or semicolon optionally
19108 // followed by a `then` keyword.
19109 pm_token_t then_keyword;
19110 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19111 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
19112 then_keyword = parser->previous;
19113 } else {
19114 then_keyword = not_provided(parser);
19115 }
19116 } else {
19117 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
19118 then_keyword = parser->previous;
19119 }
19120
19121 // Now we can actually parse the statements associated with
19122 // the in node.
19123 pm_statements_node_t *statements;
19124 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19125 statements = NULL;
19126 } else {
19127 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
19128 }
19129
19130 // Now that we have the full pattern and statements, we can
19131 // create the node and attach it to the case node.
19132 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
19133 pm_case_match_node_condition_append(case_node, condition);
19134 }
19135
19136 // If we didn't parse any conditions (in or when) then we need
19137 // to indicate that we have an error.
19138 if (case_node->conditions.size == 0) {
19139 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19140 }
19141
19142 node = (pm_node_t *) case_node;
19143 }
19144
19145 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19146 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19147 pm_token_t else_keyword = parser->previous;
19148 pm_else_node_t *else_node;
19149
19150 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19151 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19152 } else {
19153 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19154 }
19155
19156 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19157 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19158 } else {
19159 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19160 }
19161 }
19162
19163 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19164 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19165
19166 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19167 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19168 } else {
19169 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19170 }
19171
19172 pop_block_exits(parser, previous_block_exits);
19173 pm_node_list_free(&current_block_exits);
19174
19175 return node;
19176 }
19177 case PM_TOKEN_KEYWORD_BEGIN: {
19178 size_t opening_newline_index = token_newline_index(parser);
19179 parser_lex(parser);
19180
19181 pm_token_t begin_keyword = parser->previous;
19182 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19183
19184 pm_node_list_t current_block_exits = { 0 };
19185 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19186 pm_statements_node_t *begin_statements = NULL;
19187
19188 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19189 pm_accepts_block_stack_push(parser, true);
19190 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19191 pm_accepts_block_stack_pop(parser);
19192 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19193 }
19194
19195 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19196 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19197 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19198
19199 begin_node->base.location.end = parser->previous.end;
19200 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19201
19202 pop_block_exits(parser, previous_block_exits);
19203 pm_node_list_free(&current_block_exits);
19204
19205 return (pm_node_t *) begin_node;
19206 }
19207 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19208 pm_node_list_t current_block_exits = { 0 };
19209 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19210
19211 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19212 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19213 }
19214
19215 parser_lex(parser);
19216 pm_token_t keyword = parser->previous;
19217
19218 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19219 pm_token_t opening = parser->previous;
19220 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19221
19222 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19223 pm_context_t context = parser->current_context->context;
19224 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19225 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19226 }
19227
19228 flush_block_exits(parser, previous_block_exits);
19229 pm_node_list_free(&current_block_exits);
19230
19231 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19232 }
19233 case PM_TOKEN_KEYWORD_BREAK:
19234 case PM_TOKEN_KEYWORD_NEXT:
19235 case PM_TOKEN_KEYWORD_RETURN: {
19236 parser_lex(parser);
19237
19238 pm_token_t keyword = parser->previous;
19239 pm_arguments_t arguments = { 0 };
19240
19241 if (
19242 token_begins_expression_p(parser->current.type) ||
19243 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19244 ) {
19245 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19246
19247 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19248 pm_token_t next = parser->current;
19249 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19250
19251 // Reject `foo && return bar`.
19252 if (!accepts_command_call && arguments.arguments != NULL) {
19253 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
19254 }
19255 }
19256 }
19257
19258 switch (keyword.type) {
19259 case PM_TOKEN_KEYWORD_BREAK: {
19260 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19261 if (!parser->partial_script) parse_block_exit(parser, node);
19262 return node;
19263 }
19264 case PM_TOKEN_KEYWORD_NEXT: {
19265 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19266 if (!parser->partial_script) parse_block_exit(parser, node);
19267 return node;
19268 }
19269 case PM_TOKEN_KEYWORD_RETURN: {
19270 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19271 parse_return(parser, node);
19272 return node;
19273 }
19274 default:
19275 assert(false && "unreachable");
19276 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19277 }
19278 }
19279 case PM_TOKEN_KEYWORD_SUPER: {
19280 parser_lex(parser);
19281
19282 pm_token_t keyword = parser->previous;
19283 pm_arguments_t arguments = { 0 };
19284 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19285
19286 if (
19287 arguments.opening_loc.start == NULL &&
19288 arguments.arguments == NULL &&
19289 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19290 ) {
19291 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19292 }
19293
19294 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19295 }
19296 case PM_TOKEN_KEYWORD_YIELD: {
19297 parser_lex(parser);
19298
19299 pm_token_t keyword = parser->previous;
19300 pm_arguments_t arguments = { 0 };
19301 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19302
19303 // It's possible that we've parsed a block argument through our
19304 // call to parse_arguments_list. If we found one, we should mark it
19305 // as invalid and destroy it, as we don't have a place for it on the
19306 // yield node.
19307 if (arguments.block != NULL) {
19308 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19309 pm_node_destroy(parser, arguments.block);
19310 arguments.block = NULL;
19311 }
19312
19313 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19314 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19315
19316 return node;
19317 }
19318 case PM_TOKEN_KEYWORD_CLASS: {
19319 size_t opening_newline_index = token_newline_index(parser);
19320 parser_lex(parser);
19321
19322 pm_token_t class_keyword = parser->previous;
19323 pm_do_loop_stack_push(parser, false);
19324
19325 pm_node_list_t current_block_exits = { 0 };
19326 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19327
19328 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19329 pm_token_t operator = parser->previous;
19330 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19331
19332 pm_parser_scope_push(parser, true);
19333 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19334 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19335 }
19336
19337 pm_node_t *statements = NULL;
19338 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19339 pm_accepts_block_stack_push(parser, true);
19340 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19341 pm_accepts_block_stack_pop(parser);
19342 }
19343
19344 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19345 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19346 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19347 } else {
19348 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19349 }
19350
19351 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19352
19353 pm_constant_id_list_t locals;
19354 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19355
19356 pm_parser_scope_pop(parser);
19357 pm_do_loop_stack_pop(parser);
19358
19359 flush_block_exits(parser, previous_block_exits);
19360 pm_node_list_free(&current_block_exits);
19361
19362 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19363 }
19364
19365 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19366 pm_token_t name = parser->previous;
19367 if (name.type != PM_TOKEN_CONSTANT) {
19368 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19369 }
19370
19371 pm_token_t inheritance_operator;
19372 pm_node_t *superclass;
19373
19374 if (match1(parser, PM_TOKEN_LESS)) {
19375 inheritance_operator = parser->current;
19376 lex_state_set(parser, PM_LEX_STATE_BEG);
19377
19378 parser->command_start = true;
19379 parser_lex(parser);
19380
19381 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19382 } else {
19383 inheritance_operator = not_provided(parser);
19384 superclass = NULL;
19385 }
19386
19387 pm_parser_scope_push(parser, true);
19388
19389 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19390 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19391 } else {
19392 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19393 }
19394 pm_node_t *statements = NULL;
19395
19396 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19397 pm_accepts_block_stack_push(parser, true);
19398 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19399 pm_accepts_block_stack_pop(parser);
19400 }
19401
19402 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19403 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19404 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19405 } else {
19406 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19407 }
19408
19409 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19410
19411 if (context_def_p(parser)) {
19412 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19413 }
19414
19415 pm_constant_id_list_t locals;
19416 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19417
19418 pm_parser_scope_pop(parser);
19419 pm_do_loop_stack_pop(parser);
19420
19421 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19422 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19423 }
19424
19425 pop_block_exits(parser, previous_block_exits);
19426 pm_node_list_free(&current_block_exits);
19427
19428 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19429 }
19430 case PM_TOKEN_KEYWORD_DEF: {
19431 pm_node_list_t current_block_exits = { 0 };
19432 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19433
19434 pm_token_t def_keyword = parser->current;
19435 size_t opening_newline_index = token_newline_index(parser);
19436
19437 pm_node_t *receiver = NULL;
19438 pm_token_t operator = not_provided(parser);
19439 pm_token_t name;
19440
19441 // This context is necessary for lexing `...` in a bare params
19442 // correctly. It must be pushed before lexing the first param, so it
19443 // is here.
19444 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19445 parser_lex(parser);
19446
19447 // This will be false if the method name is not a valid identifier
19448 // but could be followed by an operator.
19449 bool valid_name = true;
19450
19451 switch (parser->current.type) {
19452 case PM_CASE_OPERATOR:
19453 pm_parser_scope_push(parser, true);
19454 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19455 parser_lex(parser);
19456
19457 name = parser->previous;
19458 break;
19459 case PM_TOKEN_IDENTIFIER: {
19460 parser_lex(parser);
19461
19462 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19463 receiver = parse_variable_call(parser);
19464
19465 pm_parser_scope_push(parser, true);
19466 lex_state_set(parser, PM_LEX_STATE_FNAME);
19467 parser_lex(parser);
19468
19469 operator = parser->previous;
19470 name = parse_method_definition_name(parser);
19471 } else {
19472 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19473 pm_parser_scope_push(parser, true);
19474
19475 name = parser->previous;
19476 }
19477
19478 break;
19479 }
19480 case PM_TOKEN_INSTANCE_VARIABLE:
19481 case PM_TOKEN_CLASS_VARIABLE:
19482 case PM_TOKEN_GLOBAL_VARIABLE:
19483 valid_name = false;
19485 case PM_TOKEN_CONSTANT:
19486 case PM_TOKEN_KEYWORD_NIL:
19487 case PM_TOKEN_KEYWORD_SELF:
19488 case PM_TOKEN_KEYWORD_TRUE:
19489 case PM_TOKEN_KEYWORD_FALSE:
19490 case PM_TOKEN_KEYWORD___FILE__:
19491 case PM_TOKEN_KEYWORD___LINE__:
19492 case PM_TOKEN_KEYWORD___ENCODING__: {
19493 pm_parser_scope_push(parser, true);
19494 parser_lex(parser);
19495
19496 pm_token_t identifier = parser->previous;
19497
19498 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19499 lex_state_set(parser, PM_LEX_STATE_FNAME);
19500 parser_lex(parser);
19501 operator = parser->previous;
19502
19503 switch (identifier.type) {
19504 case PM_TOKEN_CONSTANT:
19505 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19506 break;
19507 case PM_TOKEN_INSTANCE_VARIABLE:
19508 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19509 break;
19510 case PM_TOKEN_CLASS_VARIABLE:
19511 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19512 break;
19513 case PM_TOKEN_GLOBAL_VARIABLE:
19514 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19515 break;
19516 case PM_TOKEN_KEYWORD_NIL:
19517 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19518 break;
19519 case PM_TOKEN_KEYWORD_SELF:
19520 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19521 break;
19522 case PM_TOKEN_KEYWORD_TRUE:
19523 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19524 break;
19525 case PM_TOKEN_KEYWORD_FALSE:
19526 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19527 break;
19528 case PM_TOKEN_KEYWORD___FILE__:
19529 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19530 break;
19531 case PM_TOKEN_KEYWORD___LINE__:
19532 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19533 break;
19534 case PM_TOKEN_KEYWORD___ENCODING__:
19535 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19536 break;
19537 default:
19538 break;
19539 }
19540
19541 name = parse_method_definition_name(parser);
19542 } else {
19543 if (!valid_name) {
19544 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19545 }
19546
19547 name = identifier;
19548 }
19549 break;
19550 }
19551 case PM_TOKEN_PARENTHESIS_LEFT: {
19552 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19553 // the inner expression of this parenthesis should not be
19554 // processed under this context. Thus, the context is popped
19555 // here.
19556 context_pop(parser);
19557 parser_lex(parser);
19558
19559 pm_token_t lparen = parser->previous;
19560 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19561
19562 accept1(parser, PM_TOKEN_NEWLINE);
19563 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19564 pm_token_t rparen = parser->previous;
19565
19566 lex_state_set(parser, PM_LEX_STATE_FNAME);
19567 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19568
19569 operator = parser->previous;
19570 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19571
19572 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19573 // reason as described the above.
19574 pm_parser_scope_push(parser, true);
19575 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19576 name = parse_method_definition_name(parser);
19577 break;
19578 }
19579 default:
19580 pm_parser_scope_push(parser, true);
19581 name = parse_method_definition_name(parser);
19582 break;
19583 }
19584
19585 pm_token_t lparen;
19586 pm_token_t rparen;
19587 pm_parameters_node_t *params;
19588
19589 switch (parser->current.type) {
19590 case PM_TOKEN_PARENTHESIS_LEFT: {
19591 parser_lex(parser);
19592 lparen = parser->previous;
19593
19594 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19595 params = NULL;
19596 } else {
19597 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19598 }
19599
19600 lex_state_set(parser, PM_LEX_STATE_BEG);
19601 parser->command_start = true;
19602
19603 context_pop(parser);
19604 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19605 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19606 parser->previous.start = parser->previous.end;
19607 parser->previous.type = PM_TOKEN_MISSING;
19608 }
19609
19610 rparen = parser->previous;
19611 break;
19612 }
19613 case PM_CASE_PARAMETER: {
19614 // If we're about to lex a label, we need to add the label
19615 // state to make sure the next newline is ignored.
19616 if (parser->current.type == PM_TOKEN_LABEL) {
19617 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19618 }
19619
19620 lparen = not_provided(parser);
19621 rparen = not_provided(parser);
19622 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19623
19624 context_pop(parser);
19625 break;
19626 }
19627 default: {
19628 lparen = not_provided(parser);
19629 rparen = not_provided(parser);
19630 params = NULL;
19631
19632 context_pop(parser);
19633 break;
19634 }
19635 }
19636
19637 pm_node_t *statements = NULL;
19638 pm_token_t equal;
19639 pm_token_t end_keyword;
19640
19641 if (accept1(parser, PM_TOKEN_EQUAL)) {
19642 if (token_is_setter_name(&name)) {
19643 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19644 }
19645 equal = parser->previous;
19646
19647 context_push(parser, PM_CONTEXT_DEF);
19648 pm_do_loop_stack_push(parser, false);
19649 statements = (pm_node_t *) pm_statements_node_create(parser);
19650
19651 bool allow_command_call;
19652 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
19653 allow_command_call = accepts_command_call;
19654 } else {
19655 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
19656 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
19657 }
19658
19659 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19660
19661 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19662 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19663
19664 pm_token_t rescue_keyword = parser->previous;
19665 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19666 context_pop(parser);
19667
19668 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19669 }
19670
19671 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19672 pm_do_loop_stack_pop(parser);
19673 context_pop(parser);
19674 end_keyword = not_provided(parser);
19675 } else {
19676 equal = not_provided(parser);
19677
19678 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19679 lex_state_set(parser, PM_LEX_STATE_BEG);
19680 parser->command_start = true;
19681 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19682 } else {
19683 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19684 }
19685
19686 pm_accepts_block_stack_push(parser, true);
19687 pm_do_loop_stack_push(parser, false);
19688
19689 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19690 pm_accepts_block_stack_push(parser, true);
19691 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19692 pm_accepts_block_stack_pop(parser);
19693 }
19694
19695 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19696 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19697 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19698 } else {
19699 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19700 }
19701
19702 pm_accepts_block_stack_pop(parser);
19703 pm_do_loop_stack_pop(parser);
19704
19705 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19706 end_keyword = parser->previous;
19707 }
19708
19709 pm_constant_id_list_t locals;
19710 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19711 pm_parser_scope_pop(parser);
19712
19718 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19719
19720 flush_block_exits(parser, previous_block_exits);
19721 pm_node_list_free(&current_block_exits);
19722
19723 return (pm_node_t *) pm_def_node_create(
19724 parser,
19725 name_id,
19726 &name,
19727 receiver,
19728 params,
19729 statements,
19730 &locals,
19731 &def_keyword,
19732 &operator,
19733 &lparen,
19734 &rparen,
19735 &equal,
19736 &end_keyword
19737 );
19738 }
19739 case PM_TOKEN_KEYWORD_DEFINED: {
19740 parser_lex(parser);
19741 pm_token_t keyword = parser->previous;
19742
19743 pm_token_t lparen;
19744 pm_token_t rparen;
19745 pm_node_t *expression;
19746
19747 context_push(parser, PM_CONTEXT_DEFINED);
19748 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19749
19750 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19751 lparen = parser->previous;
19752
19753 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19754 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19755 lparen = not_provided(parser);
19756 rparen = not_provided(parser);
19757 } else {
19758 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19759
19760 if (parser->recovering) {
19761 rparen = not_provided(parser);
19762 } else {
19763 accept1(parser, PM_TOKEN_NEWLINE);
19764 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19765 rparen = parser->previous;
19766 }
19767 }
19768 } else {
19769 lparen = not_provided(parser);
19770 rparen = not_provided(parser);
19771 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19772 }
19773
19774 context_pop(parser);
19775 return (pm_node_t *) pm_defined_node_create(
19776 parser,
19777 &lparen,
19778 expression,
19779 &rparen,
19780 &PM_LOCATION_TOKEN_VALUE(&keyword)
19781 );
19782 }
19783 case PM_TOKEN_KEYWORD_END_UPCASE: {
19784 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19785 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19786 }
19787
19788 parser_lex(parser);
19789 pm_token_t keyword = parser->previous;
19790
19791 if (context_def_p(parser)) {
19792 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19793 }
19794
19795 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19796 pm_token_t opening = parser->previous;
19797 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19798
19799 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19800 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19801 }
19802 case PM_TOKEN_KEYWORD_FALSE:
19803 parser_lex(parser);
19804 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19805 case PM_TOKEN_KEYWORD_FOR: {
19806 size_t opening_newline_index = token_newline_index(parser);
19807 parser_lex(parser);
19808
19809 pm_token_t for_keyword = parser->previous;
19810 pm_node_t *index;
19811
19812 context_push(parser, PM_CONTEXT_FOR_INDEX);
19813
19814 // First, parse out the first index expression.
19815 if (accept1(parser, PM_TOKEN_USTAR)) {
19816 pm_token_t star_operator = parser->previous;
19817 pm_node_t *name = NULL;
19818
19819 if (token_begins_expression_p(parser->current.type)) {
19820 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19821 }
19822
19823 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19824 } else if (token_begins_expression_p(parser->current.type)) {
19825 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19826 } else {
19827 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19828 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19829 }
19830
19831 // Now, if there are multiple index expressions, parse them out.
19832 if (match1(parser, PM_TOKEN_COMMA)) {
19833 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19834 } else {
19835 index = parse_target(parser, index, false, false);
19836 }
19837
19838 context_pop(parser);
19839 pm_do_loop_stack_push(parser, true);
19840
19841 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19842 pm_token_t in_keyword = parser->previous;
19843
19844 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19845 pm_do_loop_stack_pop(parser);
19846
19847 pm_token_t do_keyword;
19848 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19849 do_keyword = parser->previous;
19850 } else {
19851 do_keyword = not_provided(parser);
19852 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19853 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19854 }
19855 }
19856
19857 pm_statements_node_t *statements = NULL;
19858 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19859 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19860 }
19861
19862 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19863 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19864
19865 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19866 }
19867 case PM_TOKEN_KEYWORD_IF:
19868 if (parser_end_of_line_p(parser)) {
19869 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19870 }
19871
19872 size_t opening_newline_index = token_newline_index(parser);
19873 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19874 parser_lex(parser);
19875
19876 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19877 case PM_TOKEN_KEYWORD_UNDEF: {
19878 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19879 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19880 }
19881
19882 parser_lex(parser);
19883 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19884 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19885
19886 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19887 pm_node_destroy(parser, name);
19888 } else {
19889 pm_undef_node_append(undef, name);
19890
19891 while (match1(parser, PM_TOKEN_COMMA)) {
19892 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19893 parser_lex(parser);
19894 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19895
19896 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19897 pm_node_destroy(parser, name);
19898 break;
19899 }
19900
19901 pm_undef_node_append(undef, name);
19902 }
19903 }
19904
19905 return (pm_node_t *) undef;
19906 }
19907 case PM_TOKEN_KEYWORD_NOT: {
19908 parser_lex(parser);
19909
19910 pm_token_t message = parser->previous;
19911 pm_arguments_t arguments = { 0 };
19912 pm_node_t *receiver = NULL;
19913
19914 // If we do not accept a command call, then we also do not accept a
19915 // not without parentheses. In this case we need to reject this
19916 // syntax.
19917 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19918 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19919 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19920 } else {
19921 accept1(parser, PM_TOKEN_NEWLINE);
19922 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19923 }
19924
19925 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19926 }
19927
19928 accept1(parser, PM_TOKEN_NEWLINE);
19929
19930 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19931 pm_token_t lparen = parser->previous;
19932
19933 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19934 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19935 } else {
19936 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19937 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19938
19939 if (!parser->recovering) {
19940 accept1(parser, PM_TOKEN_NEWLINE);
19941 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19942 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19943 }
19944 }
19945 } else {
19946 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19947 }
19948
19949 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19950 }
19951 case PM_TOKEN_KEYWORD_UNLESS: {
19952 size_t opening_newline_index = token_newline_index(parser);
19953 parser_lex(parser);
19954
19955 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19956 }
19957 case PM_TOKEN_KEYWORD_MODULE: {
19958 pm_node_list_t current_block_exits = { 0 };
19959 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19960
19961 size_t opening_newline_index = token_newline_index(parser);
19962 parser_lex(parser);
19963 pm_token_t module_keyword = parser->previous;
19964
19965 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19966 pm_token_t name;
19967
19968 // If we can recover from a syntax error that occurred while parsing
19969 // the name of the module, then we'll handle that here.
19970 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19971 pop_block_exits(parser, previous_block_exits);
19972 pm_node_list_free(&current_block_exits);
19973
19974 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19975 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19976 }
19977
19978 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19979 pm_token_t double_colon = parser->previous;
19980
19981 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19982 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19983 }
19984
19985 // Here we retrieve the name of the module. If it wasn't a constant,
19986 // then it's possible that `module foo` was passed, which is a
19987 // syntax error. We handle that here as well.
19988 name = parser->previous;
19989 if (name.type != PM_TOKEN_CONSTANT) {
19990 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19991 }
19992
19993 pm_parser_scope_push(parser, true);
19994 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19995 pm_node_t *statements = NULL;
19996
19997 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19998 pm_accepts_block_stack_push(parser, true);
19999 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
20000 pm_accepts_block_stack_pop(parser);
20001 }
20002
20003 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
20004 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
20005 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
20006 } else {
20007 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
20008 }
20009
20010 pm_constant_id_list_t locals;
20011 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
20012
20013 pm_parser_scope_pop(parser);
20014 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
20015
20016 if (context_def_p(parser)) {
20017 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
20018 }
20019
20020 pop_block_exits(parser, previous_block_exits);
20021 pm_node_list_free(&current_block_exits);
20022
20023 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
20024 }
20025 case PM_TOKEN_KEYWORD_NIL:
20026 parser_lex(parser);
20027 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
20028 case PM_TOKEN_KEYWORD_REDO: {
20029 parser_lex(parser);
20030
20031 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
20032 if (!parser->partial_script) parse_block_exit(parser, node);
20033
20034 return node;
20035 }
20036 case PM_TOKEN_KEYWORD_RETRY: {
20037 parser_lex(parser);
20038
20039 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
20040 parse_retry(parser, node);
20041
20042 return node;
20043 }
20044 case PM_TOKEN_KEYWORD_SELF:
20045 parser_lex(parser);
20046 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
20047 case PM_TOKEN_KEYWORD_TRUE:
20048 parser_lex(parser);
20049 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
20050 case PM_TOKEN_KEYWORD_UNTIL: {
20051 size_t opening_newline_index = token_newline_index(parser);
20052
20053 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20054 pm_do_loop_stack_push(parser, true);
20055
20056 parser_lex(parser);
20057 pm_token_t keyword = parser->previous;
20058 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20059
20060 pm_do_loop_stack_pop(parser);
20061 context_pop(parser);
20062
20063 pm_token_t do_keyword;
20064 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20065 do_keyword = parser->previous;
20066 } else {
20067 do_keyword = not_provided(parser);
20068 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20069 }
20070
20071 pm_statements_node_t *statements = NULL;
20072 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20073 pm_accepts_block_stack_push(parser, true);
20074 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20075 pm_accepts_block_stack_pop(parser);
20076 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20077 }
20078
20079 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20080 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
20081
20082 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20083 }
20084 case PM_TOKEN_KEYWORD_WHILE: {
20085 size_t opening_newline_index = token_newline_index(parser);
20086
20087 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20088 pm_do_loop_stack_push(parser, true);
20089
20090 parser_lex(parser);
20091 pm_token_t keyword = parser->previous;
20092 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20093
20094 pm_do_loop_stack_pop(parser);
20095 context_pop(parser);
20096
20097 pm_token_t do_keyword;
20098 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20099 do_keyword = parser->previous;
20100 } else {
20101 do_keyword = not_provided(parser);
20102 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20103 }
20104
20105 pm_statements_node_t *statements = NULL;
20106 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20107 pm_accepts_block_stack_push(parser, true);
20108 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20109 pm_accepts_block_stack_pop(parser);
20110 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20111 }
20112
20113 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20114 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
20115
20116 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20117 }
20118 case PM_TOKEN_PERCENT_LOWER_I: {
20119 parser_lex(parser);
20120 pm_token_t opening = parser->previous;
20121 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20122
20123 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20124 accept1(parser, PM_TOKEN_WORDS_SEP);
20125 if (match1(parser, PM_TOKEN_STRING_END)) break;
20126
20127 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20128 pm_token_t opening = not_provided(parser);
20129 pm_token_t closing = not_provided(parser);
20130 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
20131 }
20132
20133 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20134 }
20135
20136 pm_token_t closing = parser->current;
20137 if (match1(parser, PM_TOKEN_EOF)) {
20138 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20139 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20140 } else {
20141 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20142 }
20143 pm_array_node_close_set(array, &closing);
20144
20145 return (pm_node_t *) array;
20146 }
20147 case PM_TOKEN_PERCENT_UPPER_I: {
20148 parser_lex(parser);
20149 pm_token_t opening = parser->previous;
20150 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20151
20152 // This is the current node that we are parsing that will be added to the
20153 // list of elements.
20154 pm_node_t *current = NULL;
20155
20156 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20157 switch (parser->current.type) {
20158 case PM_TOKEN_WORDS_SEP: {
20159 if (current == NULL) {
20160 // If we hit a separator before we have any content, then we don't
20161 // need to do anything.
20162 } else {
20163 // If we hit a separator after we've hit content, then we need to
20164 // append that content to the list and reset the current node.
20165 pm_array_node_elements_append(array, current);
20166 current = NULL;
20167 }
20168
20169 parser_lex(parser);
20170 break;
20171 }
20172 case PM_TOKEN_STRING_CONTENT: {
20173 pm_token_t opening = not_provided(parser);
20174 pm_token_t closing = not_provided(parser);
20175
20176 if (current == NULL) {
20177 // If we hit content and the current node is NULL, then this is
20178 // the first string content we've seen. In that case we're going
20179 // to create a new string node and set that to the current.
20180 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20181 parser_lex(parser);
20182 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20183 // If we hit string content and the current node is an
20184 // interpolated string, then we need to append the string content
20185 // to the list of child nodes.
20186 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20187 parser_lex(parser);
20188
20189 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20190 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20191 // If we hit string content and the current node is a symbol node,
20192 // then we need to convert the current node into an interpolated
20193 // string and add the string content to the list of child nodes.
20194 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20195 pm_token_t bounds = not_provided(parser);
20196
20197 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20198 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20199 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20200 parser_lex(parser);
20201
20202 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20203 pm_interpolated_symbol_node_append(interpolated, first_string);
20204 pm_interpolated_symbol_node_append(interpolated, second_string);
20205
20206 xfree(current);
20207 current = (pm_node_t *) interpolated;
20208 } else {
20209 assert(false && "unreachable");
20210 }
20211
20212 break;
20213 }
20214 case PM_TOKEN_EMBVAR: {
20215 bool start_location_set = false;
20216 if (current == NULL) {
20217 // If we hit an embedded variable and the current node is NULL,
20218 // then this is the start of a new string. We'll set the current
20219 // node to a new interpolated string.
20220 pm_token_t opening = not_provided(parser);
20221 pm_token_t closing = not_provided(parser);
20222 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20223 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20224 // If we hit an embedded variable and the current node is a string
20225 // node, then we'll convert the current into an interpolated
20226 // string and add the string node to the list of parts.
20227 pm_token_t opening = not_provided(parser);
20228 pm_token_t closing = not_provided(parser);
20229 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20230
20231 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20232 pm_interpolated_symbol_node_append(interpolated, current);
20233 interpolated->base.location.start = current->location.start;
20234 start_location_set = true;
20235 current = (pm_node_t *) interpolated;
20236 } else {
20237 // If we hit an embedded variable and the current node is an
20238 // interpolated string, then we'll just add the embedded variable.
20239 }
20240
20241 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20242 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20243 if (!start_location_set) {
20244 current->location.start = part->location.start;
20245 }
20246 break;
20247 }
20248 case PM_TOKEN_EMBEXPR_BEGIN: {
20249 bool start_location_set = false;
20250 if (current == NULL) {
20251 // If we hit an embedded expression and the current node is NULL,
20252 // then this is the start of a new string. We'll set the current
20253 // node to a new interpolated string.
20254 pm_token_t opening = not_provided(parser);
20255 pm_token_t closing = not_provided(parser);
20256 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20257 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20258 // If we hit an embedded expression and the current node is a
20259 // string node, then we'll convert the current into an
20260 // interpolated string and add the string node to the list of
20261 // parts.
20262 pm_token_t opening = not_provided(parser);
20263 pm_token_t closing = not_provided(parser);
20264 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20265
20266 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20267 pm_interpolated_symbol_node_append(interpolated, current);
20268 interpolated->base.location.start = current->location.start;
20269 start_location_set = true;
20270 current = (pm_node_t *) interpolated;
20271 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20272 // If we hit an embedded expression and the current node is an
20273 // interpolated string, then we'll just continue on.
20274 } else {
20275 assert(false && "unreachable");
20276 }
20277
20278 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20279 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20280 if (!start_location_set) {
20281 current->location.start = part->location.start;
20282 }
20283 break;
20284 }
20285 default:
20286 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20287 parser_lex(parser);
20288 break;
20289 }
20290 }
20291
20292 // If we have a current node, then we need to append it to the list.
20293 if (current) {
20294 pm_array_node_elements_append(array, current);
20295 }
20296
20297 pm_token_t closing = parser->current;
20298 if (match1(parser, PM_TOKEN_EOF)) {
20299 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20300 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20301 } else {
20302 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20303 }
20304 pm_array_node_close_set(array, &closing);
20305
20306 return (pm_node_t *) array;
20307 }
20308 case PM_TOKEN_PERCENT_LOWER_W: {
20309 parser_lex(parser);
20310 pm_token_t opening = parser->previous;
20311 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20312
20313 // skip all leading whitespaces
20314 accept1(parser, PM_TOKEN_WORDS_SEP);
20315
20316 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20317 accept1(parser, PM_TOKEN_WORDS_SEP);
20318 if (match1(parser, PM_TOKEN_STRING_END)) break;
20319
20320 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20321 pm_token_t opening = not_provided(parser);
20322 pm_token_t closing = not_provided(parser);
20323
20324 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20325 pm_array_node_elements_append(array, string);
20326 }
20327
20328 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20329 }
20330
20331 pm_token_t closing = parser->current;
20332 if (match1(parser, PM_TOKEN_EOF)) {
20333 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20334 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20335 } else {
20336 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20337 }
20338
20339 pm_array_node_close_set(array, &closing);
20340 return (pm_node_t *) array;
20341 }
20342 case PM_TOKEN_PERCENT_UPPER_W: {
20343 parser_lex(parser);
20344 pm_token_t opening = parser->previous;
20345 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20346
20347 // This is the current node that we are parsing that will be added
20348 // to the list of elements.
20349 pm_node_t *current = NULL;
20350
20351 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20352 switch (parser->current.type) {
20353 case PM_TOKEN_WORDS_SEP: {
20354 // Reset the explicit encoding if we hit a separator
20355 // since each element can have its own encoding.
20356 parser->explicit_encoding = NULL;
20357
20358 if (current == NULL) {
20359 // If we hit a separator before we have any content,
20360 // then we don't need to do anything.
20361 } else {
20362 // If we hit a separator after we've hit content,
20363 // then we need to append that content to the list
20364 // and reset the current node.
20365 pm_array_node_elements_append(array, current);
20366 current = NULL;
20367 }
20368
20369 parser_lex(parser);
20370 break;
20371 }
20372 case PM_TOKEN_STRING_CONTENT: {
20373 pm_token_t opening = not_provided(parser);
20374 pm_token_t closing = not_provided(parser);
20375
20376 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20377 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20378 parser_lex(parser);
20379
20380 if (current == NULL) {
20381 // If we hit content and the current node is NULL,
20382 // then this is the first string content we've seen.
20383 // In that case we're going to create a new string
20384 // node and set that to the current.
20385 current = string;
20386 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20387 // If we hit string content and the current node is
20388 // an interpolated string, then we need to append
20389 // the string content to the list of child nodes.
20390 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20391 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20392 // If we hit string content and the current node is
20393 // a string node, then we need to convert the
20394 // current node into an interpolated string and add
20395 // the string content to the list of child nodes.
20396 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20397 pm_interpolated_string_node_append(interpolated, current);
20398 pm_interpolated_string_node_append(interpolated, string);
20399 current = (pm_node_t *) interpolated;
20400 } else {
20401 assert(false && "unreachable");
20402 }
20403
20404 break;
20405 }
20406 case PM_TOKEN_EMBVAR: {
20407 if (current == NULL) {
20408 // If we hit an embedded variable and the current
20409 // node is NULL, then this is the start of a new
20410 // string. We'll set the current node to a new
20411 // interpolated string.
20412 pm_token_t opening = not_provided(parser);
20413 pm_token_t closing = not_provided(parser);
20414 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20415 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20416 // If we hit an embedded variable and the current
20417 // node is a string node, then we'll convert the
20418 // current into an interpolated string and add the
20419 // string node to the list of parts.
20420 pm_token_t opening = not_provided(parser);
20421 pm_token_t closing = not_provided(parser);
20422 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20423 pm_interpolated_string_node_append(interpolated, current);
20424 current = (pm_node_t *) interpolated;
20425 } else {
20426 // If we hit an embedded variable and the current
20427 // node is an interpolated string, then we'll just
20428 // add the embedded variable.
20429 }
20430
20431 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20432 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20433 break;
20434 }
20435 case PM_TOKEN_EMBEXPR_BEGIN: {
20436 if (current == NULL) {
20437 // If we hit an embedded expression and the current
20438 // node is NULL, then this is the start of a new
20439 // string. We'll set the current node to a new
20440 // interpolated string.
20441 pm_token_t opening = not_provided(parser);
20442 pm_token_t closing = not_provided(parser);
20443 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20444 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20445 // If we hit an embedded expression and the current
20446 // node is a string node, then we'll convert the
20447 // current into an interpolated string and add the
20448 // string node to the list of parts.
20449 pm_token_t opening = not_provided(parser);
20450 pm_token_t closing = not_provided(parser);
20451 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20452 pm_interpolated_string_node_append(interpolated, current);
20453 current = (pm_node_t *) interpolated;
20454 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20455 // If we hit an embedded expression and the current
20456 // node is an interpolated string, then we'll just
20457 // continue on.
20458 } else {
20459 assert(false && "unreachable");
20460 }
20461
20462 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20463 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20464 break;
20465 }
20466 default:
20467 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20468 parser_lex(parser);
20469 break;
20470 }
20471 }
20472
20473 // If we have a current node, then we need to append it to the list.
20474 if (current) {
20475 pm_array_node_elements_append(array, current);
20476 }
20477
20478 pm_token_t closing = parser->current;
20479 if (match1(parser, PM_TOKEN_EOF)) {
20480 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20481 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20482 } else {
20483 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20484 }
20485
20486 pm_array_node_close_set(array, &closing);
20487 return (pm_node_t *) array;
20488 }
20489 case PM_TOKEN_REGEXP_BEGIN: {
20490 pm_token_t opening = parser->current;
20491 parser_lex(parser);
20492
20493 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20494 // If we get here, then we have an end immediately after a start. In
20495 // that case we'll create an empty content token and return an
20496 // uninterpolated regular expression.
20497 pm_token_t content = (pm_token_t) {
20498 .type = PM_TOKEN_STRING_CONTENT,
20499 .start = parser->previous.end,
20500 .end = parser->previous.end
20501 };
20502
20503 parser_lex(parser);
20504
20505 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20506 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
20507
20508 return node;
20509 }
20510
20512
20513 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20514 // In this case we've hit string content so we know the regular
20515 // expression at least has something in it. We'll need to check if the
20516 // following token is the end (in which case we can return a plain
20517 // regular expression) or if it's not then it has interpolation.
20518 pm_string_t unescaped = parser->current_string;
20519 pm_token_t content = parser->current;
20520 bool ascii_only = parser->current_regular_expression_ascii_only;
20521 parser_lex(parser);
20522
20523 // If we hit an end, then we can create a regular expression
20524 // node without interpolation, which can be represented more
20525 // succinctly and more easily compiled.
20526 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20527 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20528
20529 // If we're not immediately followed by a =~, then we want
20530 // to parse all of the errors at this point. If it is
20531 // followed by a =~, then it will get parsed higher up while
20532 // parsing the named captures as well.
20533 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20534 parse_regular_expression_errors(parser, node);
20535 }
20536
20537 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20538 return (pm_node_t *) node;
20539 }
20540
20541 // If we get here, then we have interpolation so we'll need to create
20542 // a regular expression node with interpolation.
20543 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20544
20545 pm_token_t opening = not_provided(parser);
20546 pm_token_t closing = not_provided(parser);
20547 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20548
20549 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20550 // This is extremely strange, but the first string part of a
20551 // regular expression will always be tagged as binary if we
20552 // are in a US-ASCII file, no matter its contents.
20553 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20554 }
20555
20556 pm_interpolated_regular_expression_node_append(interpolated, part);
20557 } else {
20558 // If the first part of the body of the regular expression is not a
20559 // string content, then we have interpolation and we need to create an
20560 // interpolated regular expression node.
20561 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20562 }
20563
20564 // Now that we're here and we have interpolation, we'll parse all of the
20565 // parts into the list.
20566 pm_node_t *part;
20567 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20568 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20569 pm_interpolated_regular_expression_node_append(interpolated, part);
20570 }
20571 }
20572
20573 pm_token_t closing = parser->current;
20574 if (match1(parser, PM_TOKEN_EOF)) {
20575 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20576 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20577 } else {
20578 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20579 }
20580
20581 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20582 return (pm_node_t *) interpolated;
20583 }
20584 case PM_TOKEN_BACKTICK:
20585 case PM_TOKEN_PERCENT_LOWER_X: {
20586 parser_lex(parser);
20587 pm_token_t opening = parser->previous;
20588
20589 // When we get here, we don't know if this string is going to have
20590 // interpolation or not, even though it is allowed. Still, we want to be
20591 // able to return a string node without interpolation if we can since
20592 // it'll be faster.
20593 if (match1(parser, PM_TOKEN_STRING_END)) {
20594 // If we get here, then we have an end immediately after a start. In
20595 // that case we'll create an empty content token and return an
20596 // uninterpolated string.
20597 pm_token_t content = (pm_token_t) {
20598 .type = PM_TOKEN_STRING_CONTENT,
20599 .start = parser->previous.end,
20600 .end = parser->previous.end
20601 };
20602
20603 parser_lex(parser);
20604 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20605 }
20606
20608
20609 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20610 // In this case we've hit string content so we know the string
20611 // at least has something in it. We'll need to check if the
20612 // following token is the end (in which case we can return a
20613 // plain string) or if it's not then it has interpolation.
20614 pm_string_t unescaped = parser->current_string;
20615 pm_token_t content = parser->current;
20616 parser_lex(parser);
20617
20618 if (match1(parser, PM_TOKEN_STRING_END)) {
20619 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20620 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20621 parser_lex(parser);
20622 return node;
20623 }
20624
20625 // If we get here, then we have interpolation so we'll need to
20626 // create a string node with interpolation.
20627 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20628
20629 pm_token_t opening = not_provided(parser);
20630 pm_token_t closing = not_provided(parser);
20631
20632 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20633 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20634
20635 pm_interpolated_xstring_node_append(node, part);
20636 } else {
20637 // If the first part of the body of the string is not a string
20638 // content, then we have interpolation and we need to create an
20639 // interpolated string node.
20640 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20641 }
20642
20643 pm_node_t *part;
20644 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20645 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20646 pm_interpolated_xstring_node_append(node, part);
20647 }
20648 }
20649
20650 pm_token_t closing = parser->current;
20651 if (match1(parser, PM_TOKEN_EOF)) {
20652 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20653 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20654 } else {
20655 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20656 }
20657 pm_interpolated_xstring_node_closing_set(node, &closing);
20658
20659 return (pm_node_t *) node;
20660 }
20661 case PM_TOKEN_USTAR: {
20662 parser_lex(parser);
20663
20664 // * operators at the beginning of expressions are only valid in the
20665 // context of a multiple assignment. We enforce that here. We'll
20666 // still lex past it though and create a missing node place.
20667 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20668 pm_parser_err_prefix(parser, diag_id);
20669 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20670 }
20671
20672 pm_token_t operator = parser->previous;
20673 pm_node_t *name = NULL;
20674
20675 if (token_begins_expression_p(parser->current.type)) {
20676 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20677 }
20678
20679 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20680
20681 if (match1(parser, PM_TOKEN_COMMA)) {
20682 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20683 } else {
20684 return parse_target_validate(parser, splat, true);
20685 }
20686 }
20687 case PM_TOKEN_BANG: {
20688 if (binding_power > PM_BINDING_POWER_UNARY) {
20689 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20690 }
20691
20692 parser_lex(parser);
20693
20694 pm_token_t operator = parser->previous;
20695 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20696 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20697
20698 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20699 return (pm_node_t *) node;
20700 }
20701 case PM_TOKEN_TILDE: {
20702 if (binding_power > PM_BINDING_POWER_UNARY) {
20703 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20704 }
20705 parser_lex(parser);
20706
20707 pm_token_t operator = parser->previous;
20708 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20709 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20710
20711 return (pm_node_t *) node;
20712 }
20713 case PM_TOKEN_UMINUS: {
20714 if (binding_power > PM_BINDING_POWER_UNARY) {
20715 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20716 }
20717 parser_lex(parser);
20718
20719 pm_token_t operator = parser->previous;
20720 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20721 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20722
20723 return (pm_node_t *) node;
20724 }
20725 case PM_TOKEN_UMINUS_NUM: {
20726 parser_lex(parser);
20727
20728 pm_token_t operator = parser->previous;
20729 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20730
20731 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20732 pm_token_t exponent_operator = parser->previous;
20733 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20734 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20735 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20736 } else {
20737 switch (PM_NODE_TYPE(node)) {
20738 case PM_INTEGER_NODE:
20739 case PM_FLOAT_NODE:
20740 case PM_RATIONAL_NODE:
20741 case PM_IMAGINARY_NODE:
20742 parse_negative_numeric(node);
20743 break;
20744 default:
20745 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20746 break;
20747 }
20748 }
20749
20750 return node;
20751 }
20752 case PM_TOKEN_MINUS_GREATER: {
20753 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20755
20756 size_t opening_newline_index = token_newline_index(parser);
20757 pm_accepts_block_stack_push(parser, true);
20758 parser_lex(parser);
20759
20760 pm_token_t operator = parser->previous;
20761 pm_parser_scope_push(parser, false);
20762
20763 pm_block_parameters_node_t *block_parameters;
20764
20765 switch (parser->current.type) {
20766 case PM_TOKEN_PARENTHESIS_LEFT: {
20767 pm_token_t opening = parser->current;
20768 parser_lex(parser);
20769
20770 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20771 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20772 } else {
20773 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20774 }
20775
20776 accept1(parser, PM_TOKEN_NEWLINE);
20777 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20778
20779 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20780 break;
20781 }
20782 case PM_CASE_PARAMETER: {
20783 pm_accepts_block_stack_push(parser, false);
20784 pm_token_t opening = not_provided(parser);
20785 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20786 pm_accepts_block_stack_pop(parser);
20787 break;
20788 }
20789 default: {
20790 block_parameters = NULL;
20791 break;
20792 }
20793 }
20794
20795 pm_token_t opening;
20796 pm_node_t *body = NULL;
20797 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20798
20799 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20800 opening = parser->previous;
20801
20802 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20803 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20804 }
20805
20806 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20807 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20808 } else {
20809 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20810 opening = parser->previous;
20811
20812 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20813 pm_accepts_block_stack_push(parser, true);
20814 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20815 pm_accepts_block_stack_pop(parser);
20816 }
20817
20818 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20819 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20820 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20821 } else {
20822 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20823 }
20824
20825 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20826 }
20827
20828 pm_constant_id_list_t locals;
20829 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20830 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20831
20832 pm_parser_scope_pop(parser);
20833 pm_accepts_block_stack_pop(parser);
20834
20835 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20836 }
20837 case PM_TOKEN_UPLUS: {
20838 if (binding_power > PM_BINDING_POWER_UNARY) {
20839 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20840 }
20841 parser_lex(parser);
20842
20843 pm_token_t operator = parser->previous;
20844 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20845 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20846
20847 return (pm_node_t *) node;
20848 }
20849 case PM_TOKEN_STRING_BEGIN:
20850 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20851 case PM_TOKEN_SYMBOL_BEGIN: {
20852 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20853 parser_lex(parser);
20854
20855 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20856 }
20857 default: {
20858 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20859
20860 if (recoverable != PM_CONTEXT_NONE) {
20861 parser->recovering = true;
20862
20863 // If the given error is not the generic one, then we'll add it
20864 // here because it will provide more context in addition to the
20865 // recoverable error that we will also add.
20866 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20867 pm_parser_err_prefix(parser, diag_id);
20868 }
20869
20870 // If we get here, then we are assuming this token is closing a
20871 // parent context, so we'll indicate that to the user so that
20872 // they know how we behaved.
20873 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20874 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20875 // We're going to make a special case here, because "cannot
20876 // parse expression" is pretty generic, and we know here that we
20877 // have an unexpected token.
20878 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20879 } else {
20880 pm_parser_err_prefix(parser, diag_id);
20881 }
20882
20883 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20884 }
20885 }
20886}
20887
20897static pm_node_t *
20898parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20899 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20900
20901 // Contradicting binding powers, the right-hand-side value of the assignment
20902 // allows the `rescue` modifier.
20903 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20904 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20905
20906 pm_token_t rescue = parser->current;
20907 parser_lex(parser);
20908
20909 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20910 context_pop(parser);
20911
20912 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20913 }
20914
20915 return value;
20916}
20917
20922static void
20923parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20924 switch (PM_NODE_TYPE(node)) {
20925 case PM_BEGIN_NODE: {
20926 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20927 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20928 break;
20929 }
20930 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20932 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20933 break;
20934 }
20935 case PM_PARENTHESES_NODE: {
20936 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20937 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20938 break;
20939 }
20940 case PM_STATEMENTS_NODE: {
20941 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20942 const pm_node_t *statement;
20943
20944 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20945 parse_assignment_value_local(parser, statement);
20946 }
20947 break;
20948 }
20949 default:
20950 break;
20951 }
20952}
20953
20966static pm_node_t *
20967parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20968 bool permitted = true;
20969 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20970
20971 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20972 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20973
20974 parse_assignment_value_local(parser, value);
20975 bool single_value = true;
20976
20977 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20978 single_value = false;
20979
20980 pm_token_t opening = not_provided(parser);
20981 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20982
20983 pm_array_node_elements_append(array, value);
20984 value = (pm_node_t *) array;
20985
20986 while (accept1(parser, PM_TOKEN_COMMA)) {
20987 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20988
20989 pm_array_node_elements_append(array, element);
20990 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20991
20992 parse_assignment_value_local(parser, element);
20993 }
20994 }
20995
20996 // Contradicting binding powers, the right-hand-side value of the assignment
20997 // allows the `rescue` modifier.
20998 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20999 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21000
21001 pm_token_t rescue = parser->current;
21002 parser_lex(parser);
21003
21004 bool accepts_command_call_inner = false;
21005
21006 // RHS can accept command call iff the value is a call with arguments
21007 // but without parenthesis.
21008 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
21009 pm_call_node_t *call_node = (pm_call_node_t *) value;
21010 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
21011 accepts_command_call_inner = true;
21012 }
21013 }
21014
21015 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21016 context_pop(parser);
21017
21018 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
21019 }
21020
21021 return value;
21022}
21023
21031static void
21032parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
21033 if (call_node->arguments != NULL) {
21034 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
21035 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
21036 call_node->arguments = NULL;
21037 }
21038
21039 if (call_node->block != NULL) {
21040 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
21041 pm_node_destroy(parser, (pm_node_t *) call_node->block);
21042 call_node->block = NULL;
21043 }
21044}
21045
21070
21071static inline const uint8_t *
21072pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21073 cursor++;
21074
21075 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21076 uint8_t value = escape_hexadecimal_digit(*cursor);
21077 cursor++;
21078
21079 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21080 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
21081 cursor++;
21082 }
21083
21084 pm_buffer_append_byte(unescaped, value);
21085 } else {
21086 pm_buffer_append_string(unescaped, "\\x", 2);
21087 }
21088
21089 return cursor;
21090}
21091
21092static inline const uint8_t *
21093pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21094 uint8_t value = (uint8_t) (*cursor - '0');
21095 cursor++;
21096
21097 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21098 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21099 cursor++;
21100
21101 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21102 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21103 cursor++;
21104 }
21105 }
21106
21107 pm_buffer_append_byte(unescaped, value);
21108 return cursor;
21109}
21110
21111static inline const uint8_t *
21112pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21113 const uint8_t *start = cursor - 1;
21114 cursor++;
21115
21116 if (cursor >= end) {
21117 pm_buffer_append_string(unescaped, "\\u", 2);
21118 return cursor;
21119 }
21120
21121 if (*cursor != '{') {
21122 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
21123 uint32_t value = escape_unicode(parser, cursor, length);
21124
21125 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
21126 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
21127 }
21128
21129 return cursor + length;
21130 }
21131
21132 cursor++;
21133 for (;;) {
21134 while (cursor < end && *cursor == ' ') cursor++;
21135
21136 if (cursor >= end) break;
21137 if (*cursor == '}') {
21138 cursor++;
21139 break;
21140 }
21141
21142 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
21143 uint32_t value = escape_unicode(parser, cursor, length);
21144
21145 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
21146 cursor += length;
21147 }
21148
21149 return cursor;
21150}
21151
21152static void
21153pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21154 const uint8_t *end = source + length;
21155 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21156
21157 for (;;) {
21158 if (++cursor >= end) {
21159 pm_buffer_append_byte(unescaped, '\\');
21160 return;
21161 }
21162
21163 switch (*cursor) {
21164 case 'x':
21165 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21166 break;
21167 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21168 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21169 break;
21170 case 'u':
21171 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21172 break;
21173 default:
21174 pm_buffer_append_byte(unescaped, '\\');
21175 break;
21176 }
21177
21178 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21179 if (next_cursor == NULL) break;
21180
21181 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21182 cursor = next_cursor;
21183 }
21184
21185 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21186}
21187
21192static void
21193parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21195
21196 pm_parser_t *parser = callback_data->parser;
21197 pm_call_node_t *call = callback_data->call;
21198 pm_constant_id_list_t *names = &callback_data->names;
21199
21200 const uint8_t *source = pm_string_source(capture);
21201 size_t length = pm_string_length(capture);
21202 pm_buffer_t unescaped = { 0 };
21203
21204 // First, we need to handle escapes within the name of the capture group.
21205 // This is because regular expressions have three different representations
21206 // in prism. The first is the plain source code. The second is the
21207 // representation that will be sent to the regular expression engine, which
21208 // is the value of the "unescaped" field. This is poorly named, because it
21209 // actually still contains escapes, just a subset of them that the regular
21210 // expression engine knows how to handle. The third representation is fully
21211 // unescaped, which is what we need.
21212 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21213 if (PRISM_UNLIKELY(cursor != NULL)) {
21214 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21215 source = (const uint8_t *) pm_buffer_value(&unescaped);
21216 length = pm_buffer_length(&unescaped);
21217 }
21218
21219 pm_location_t location;
21220 pm_constant_id_t name;
21221
21222 // If the name of the capture group isn't a valid identifier, we do
21223 // not add it to the local table.
21224 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21225 pm_buffer_free(&unescaped);
21226 return;
21227 }
21228
21229 if (callback_data->shared) {
21230 // If the unescaped string is a slice of the source, then we can
21231 // copy the names directly. The pointers will line up.
21232 location = (pm_location_t) { .start = source, .end = source + length };
21233 name = pm_parser_constant_id_location(parser, location.start, location.end);
21234 } else {
21235 // Otherwise, the name is a slice of the malloc-ed owned string,
21236 // in which case we need to copy it out into a new string.
21237 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21238
21239 void *memory = xmalloc(length);
21240 if (memory == NULL) abort();
21241
21242 memcpy(memory, source, length);
21243 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21244 }
21245
21246 // Add this name to the list of constants if it is valid, not duplicated,
21247 // and not a keyword.
21248 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21249 pm_constant_id_list_append(names, name);
21250
21251 int depth;
21252 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21253 // If the local is not already a local but it is a keyword, then we
21254 // do not want to add a capture for this.
21255 if (pm_local_is_keyword((const char *) source, length)) {
21256 pm_buffer_free(&unescaped);
21257 return;
21258 }
21259
21260 // If the identifier is not already a local, then we will add it to
21261 // the local table.
21262 pm_parser_local_add(parser, name, location.start, location.end, 0);
21263 }
21264
21265 // Here we lazily create the MatchWriteNode since we know we're
21266 // about to add a target.
21267 if (callback_data->match == NULL) {
21268 callback_data->match = pm_match_write_node_create(parser, call);
21269 }
21270
21271 // Next, create the local variable target and add it to the list of
21272 // targets for the match.
21273 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21274 pm_node_list_append(&callback_data->match->targets, target);
21275 }
21276
21277 pm_buffer_free(&unescaped);
21278}
21279
21284static pm_node_t *
21285parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21287 .parser = parser,
21288 .call = call,
21289 .names = { 0 },
21290 .shared = content->type == PM_STRING_SHARED
21291 };
21292
21294 .parser = parser,
21295 .start = call->receiver->location.start,
21296 .end = call->receiver->location.end,
21297 .shared = content->type == PM_STRING_SHARED
21298 };
21299
21300 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21301 pm_constant_id_list_free(&callback_data.names);
21302
21303 if (callback_data.match != NULL) {
21304 return (pm_node_t *) callback_data.match;
21305 } else {
21306 return (pm_node_t *) call;
21307 }
21308}
21309
21310static inline pm_node_t *
21311parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21312 pm_token_t token = parser->current;
21313
21314 switch (token.type) {
21315 case PM_TOKEN_EQUAL: {
21316 switch (PM_NODE_TYPE(node)) {
21317 case PM_CALL_NODE: {
21318 // If we have no arguments to the call node and we need this
21319 // to be a target then this is either a method call or a
21320 // local variable write. This _must_ happen before the value
21321 // is parsed because it could be referenced in the value.
21322 pm_call_node_t *call_node = (pm_call_node_t *) node;
21323 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21324 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21325 }
21326 }
21328 case PM_CASE_WRITABLE: {
21329 // When we have `it = value`, we need to add `it` as a local
21330 // variable before parsing the value, in case the value
21331 // references the variable.
21332 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21333 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
21334 }
21335
21336 parser_lex(parser);
21337 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21338
21339 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21340 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21341 }
21342
21343 return parse_write(parser, node, &token, value);
21344 }
21345 case PM_SPLAT_NODE: {
21346 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21347 pm_multi_target_node_targets_append(parser, multi_target, node);
21348
21349 parser_lex(parser);
21350 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21351 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21352 }
21353 case PM_SOURCE_ENCODING_NODE:
21354 case PM_FALSE_NODE:
21355 case PM_SOURCE_FILE_NODE:
21356 case PM_SOURCE_LINE_NODE:
21357 case PM_NIL_NODE:
21358 case PM_SELF_NODE:
21359 case PM_TRUE_NODE: {
21360 // In these special cases, we have specific error messages
21361 // and we will replace them with local variable writes.
21362 parser_lex(parser);
21363 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21364 return parse_unwriteable_write(parser, node, &token, value);
21365 }
21366 default:
21367 // In this case we have an = sign, but we don't know what
21368 // it's for. We need to treat it as an error. We'll mark it
21369 // as an error and skip past it.
21370 parser_lex(parser);
21371 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21372 return node;
21373 }
21374 }
21375 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21376 switch (PM_NODE_TYPE(node)) {
21377 case PM_BACK_REFERENCE_READ_NODE:
21378 case PM_NUMBERED_REFERENCE_READ_NODE:
21379 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21381 case PM_GLOBAL_VARIABLE_READ_NODE: {
21382 parser_lex(parser);
21383
21384 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21385 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21386
21387 pm_node_destroy(parser, node);
21388 return result;
21389 }
21390 case PM_CLASS_VARIABLE_READ_NODE: {
21391 parser_lex(parser);
21392
21393 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21394 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21395
21396 pm_node_destroy(parser, node);
21397 return result;
21398 }
21399 case PM_CONSTANT_PATH_NODE: {
21400 parser_lex(parser);
21401
21402 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21403 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21404
21405 return parse_shareable_constant_write(parser, write);
21406 }
21407 case PM_CONSTANT_READ_NODE: {
21408 parser_lex(parser);
21409
21410 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21411 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21412
21413 pm_node_destroy(parser, node);
21414 return parse_shareable_constant_write(parser, write);
21415 }
21416 case PM_INSTANCE_VARIABLE_READ_NODE: {
21417 parser_lex(parser);
21418
21419 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21420 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21421
21422 pm_node_destroy(parser, node);
21423 return result;
21424 }
21425 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21426 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21427 parser_lex(parser);
21428
21429 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21430 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21431
21432 parse_target_implicit_parameter(parser, node);
21433 pm_node_destroy(parser, node);
21434 return result;
21435 }
21436 case PM_LOCAL_VARIABLE_READ_NODE: {
21437 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21438 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21439 parse_target_implicit_parameter(parser, node);
21440 }
21441
21443 parser_lex(parser);
21444
21445 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21446 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21447
21448 pm_node_destroy(parser, node);
21449 return result;
21450 }
21451 case PM_CALL_NODE: {
21452 pm_call_node_t *cast = (pm_call_node_t *) node;
21453
21454 // If we have a vcall (a method with no arguments and no
21455 // receiver that could have been a local variable) then we
21456 // will transform it into a local variable write.
21457 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21458 pm_location_t *message_loc = &cast->message_loc;
21459 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21460
21461 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21462 parser_lex(parser);
21463
21464 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21465 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21466
21467 pm_node_destroy(parser, (pm_node_t *) cast);
21468 return result;
21469 }
21470
21471 // Move past the token here so that we have already added
21472 // the local variable by this point.
21473 parser_lex(parser);
21474
21475 // If there is no call operator and the message is "[]" then
21476 // this is an aref expression, and we can transform it into
21477 // an aset expression.
21478 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21479 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21480 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21481 }
21482
21483 // If this node cannot be writable, then we have an error.
21484 if (pm_call_node_writable_p(parser, cast)) {
21485 parse_write_name(parser, &cast->name);
21486 } else {
21487 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21488 }
21489
21490 parse_call_operator_write(parser, cast, &token);
21491 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21492 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21493 }
21494 case PM_MULTI_WRITE_NODE: {
21495 parser_lex(parser);
21496 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21497 return node;
21498 }
21499 default:
21500 parser_lex(parser);
21501
21502 // In this case we have an &&= sign, but we don't know what it's for.
21503 // We need to treat it as an error. For now, we'll mark it as an error
21504 // and just skip right past it.
21505 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21506 return node;
21507 }
21508 }
21509 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21510 switch (PM_NODE_TYPE(node)) {
21511 case PM_BACK_REFERENCE_READ_NODE:
21512 case PM_NUMBERED_REFERENCE_READ_NODE:
21513 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21515 case PM_GLOBAL_VARIABLE_READ_NODE: {
21516 parser_lex(parser);
21517
21518 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21519 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21520
21521 pm_node_destroy(parser, node);
21522 return result;
21523 }
21524 case PM_CLASS_VARIABLE_READ_NODE: {
21525 parser_lex(parser);
21526
21527 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21528 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21529
21530 pm_node_destroy(parser, node);
21531 return result;
21532 }
21533 case PM_CONSTANT_PATH_NODE: {
21534 parser_lex(parser);
21535
21536 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21537 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21538
21539 return parse_shareable_constant_write(parser, write);
21540 }
21541 case PM_CONSTANT_READ_NODE: {
21542 parser_lex(parser);
21543
21544 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21545 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21546
21547 pm_node_destroy(parser, node);
21548 return parse_shareable_constant_write(parser, write);
21549 }
21550 case PM_INSTANCE_VARIABLE_READ_NODE: {
21551 parser_lex(parser);
21552
21553 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21554 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21555
21556 pm_node_destroy(parser, node);
21557 return result;
21558 }
21559 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21560 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21561 parser_lex(parser);
21562
21563 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21564 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21565
21566 parse_target_implicit_parameter(parser, node);
21567 pm_node_destroy(parser, node);
21568 return result;
21569 }
21570 case PM_LOCAL_VARIABLE_READ_NODE: {
21571 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21572 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21573 parse_target_implicit_parameter(parser, node);
21574 }
21575
21577 parser_lex(parser);
21578
21579 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21580 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21581
21582 pm_node_destroy(parser, node);
21583 return result;
21584 }
21585 case PM_CALL_NODE: {
21586 pm_call_node_t *cast = (pm_call_node_t *) node;
21587
21588 // If we have a vcall (a method with no arguments and no
21589 // receiver that could have been a local variable) then we
21590 // will transform it into a local variable write.
21591 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21592 pm_location_t *message_loc = &cast->message_loc;
21593 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21594
21595 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21596 parser_lex(parser);
21597
21598 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21599 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21600
21601 pm_node_destroy(parser, (pm_node_t *) cast);
21602 return result;
21603 }
21604
21605 // Move past the token here so that we have already added
21606 // the local variable by this point.
21607 parser_lex(parser);
21608
21609 // If there is no call operator and the message is "[]" then
21610 // this is an aref expression, and we can transform it into
21611 // an aset expression.
21612 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21613 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21614 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21615 }
21616
21617 // If this node cannot be writable, then we have an error.
21618 if (pm_call_node_writable_p(parser, cast)) {
21619 parse_write_name(parser, &cast->name);
21620 } else {
21621 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21622 }
21623
21624 parse_call_operator_write(parser, cast, &token);
21625 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21626 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21627 }
21628 case PM_MULTI_WRITE_NODE: {
21629 parser_lex(parser);
21630 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21631 return node;
21632 }
21633 default:
21634 parser_lex(parser);
21635
21636 // In this case we have an ||= sign, but we don't know what it's for.
21637 // We need to treat it as an error. For now, we'll mark it as an error
21638 // and just skip right past it.
21639 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21640 return node;
21641 }
21642 }
21643 case PM_TOKEN_AMPERSAND_EQUAL:
21644 case PM_TOKEN_CARET_EQUAL:
21645 case PM_TOKEN_GREATER_GREATER_EQUAL:
21646 case PM_TOKEN_LESS_LESS_EQUAL:
21647 case PM_TOKEN_MINUS_EQUAL:
21648 case PM_TOKEN_PERCENT_EQUAL:
21649 case PM_TOKEN_PIPE_EQUAL:
21650 case PM_TOKEN_PLUS_EQUAL:
21651 case PM_TOKEN_SLASH_EQUAL:
21652 case PM_TOKEN_STAR_EQUAL:
21653 case PM_TOKEN_STAR_STAR_EQUAL: {
21654 switch (PM_NODE_TYPE(node)) {
21655 case PM_BACK_REFERENCE_READ_NODE:
21656 case PM_NUMBERED_REFERENCE_READ_NODE:
21657 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21659 case PM_GLOBAL_VARIABLE_READ_NODE: {
21660 parser_lex(parser);
21661
21662 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21663 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21664
21665 pm_node_destroy(parser, node);
21666 return result;
21667 }
21668 case PM_CLASS_VARIABLE_READ_NODE: {
21669 parser_lex(parser);
21670
21671 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21672 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21673
21674 pm_node_destroy(parser, node);
21675 return result;
21676 }
21677 case PM_CONSTANT_PATH_NODE: {
21678 parser_lex(parser);
21679
21680 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21681 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21682
21683 return parse_shareable_constant_write(parser, write);
21684 }
21685 case PM_CONSTANT_READ_NODE: {
21686 parser_lex(parser);
21687
21688 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21689 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21690
21691 pm_node_destroy(parser, node);
21692 return parse_shareable_constant_write(parser, write);
21693 }
21694 case PM_INSTANCE_VARIABLE_READ_NODE: {
21695 parser_lex(parser);
21696
21697 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21698 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21699
21700 pm_node_destroy(parser, node);
21701 return result;
21702 }
21703 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21704 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21705 parser_lex(parser);
21706
21707 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21708 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21709
21710 parse_target_implicit_parameter(parser, node);
21711 pm_node_destroy(parser, node);
21712 return result;
21713 }
21714 case PM_LOCAL_VARIABLE_READ_NODE: {
21715 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21716 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21717 parse_target_implicit_parameter(parser, node);
21718 }
21719
21721 parser_lex(parser);
21722
21723 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21724 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21725
21726 pm_node_destroy(parser, node);
21727 return result;
21728 }
21729 case PM_CALL_NODE: {
21730 parser_lex(parser);
21731 pm_call_node_t *cast = (pm_call_node_t *) node;
21732
21733 // If we have a vcall (a method with no arguments and no
21734 // receiver that could have been a local variable) then we
21735 // will transform it into a local variable write.
21736 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21737 pm_location_t *message_loc = &cast->message_loc;
21738 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21739
21740 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21741 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21742 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21743
21744 pm_node_destroy(parser, (pm_node_t *) cast);
21745 return result;
21746 }
21747
21748 // If there is no call operator and the message is "[]" then
21749 // this is an aref expression, and we can transform it into
21750 // an aset expression.
21751 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21752 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21753 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21754 }
21755
21756 // If this node cannot be writable, then we have an error.
21757 if (pm_call_node_writable_p(parser, cast)) {
21758 parse_write_name(parser, &cast->name);
21759 } else {
21760 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21761 }
21762
21763 parse_call_operator_write(parser, cast, &token);
21764 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21765 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21766 }
21767 case PM_MULTI_WRITE_NODE: {
21768 parser_lex(parser);
21769 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21770 return node;
21771 }
21772 default:
21773 parser_lex(parser);
21774
21775 // In this case we have an operator but we don't know what it's for.
21776 // We need to treat it as an error. For now, we'll mark it as an error
21777 // and just skip right past it.
21778 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21779 return node;
21780 }
21781 }
21782 case PM_TOKEN_AMPERSAND_AMPERSAND:
21783 case PM_TOKEN_KEYWORD_AND: {
21784 parser_lex(parser);
21785
21786 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21787 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21788 }
21789 case PM_TOKEN_KEYWORD_OR:
21790 case PM_TOKEN_PIPE_PIPE: {
21791 parser_lex(parser);
21792
21793 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21794 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21795 }
21796 case PM_TOKEN_EQUAL_TILDE: {
21797 // Note that we _must_ parse the value before adding the local
21798 // variables in order to properly mirror the behavior of Ruby. For
21799 // example,
21800 //
21801 // /(?<foo>bar)/ =~ foo
21802 //
21803 // In this case, `foo` should be a method call and not a local yet.
21804 parser_lex(parser);
21805 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21806
21807 // By default, we're going to create a call node and then return it.
21808 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21809 pm_node_t *result = (pm_node_t *) call;
21810
21811 // If the receiver of this =~ is a regular expression node, then we
21812 // need to introduce local variables for it based on its named
21813 // capture groups.
21814 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21815 // It's possible to have an interpolated regular expression node
21816 // that only contains strings. This is because it can be split
21817 // up by a heredoc. In this case we need to concat the unescaped
21818 // strings together and then parse them as a regular expression.
21820
21821 bool interpolated = false;
21822 size_t total_length = 0;
21823
21824 pm_node_t *part;
21825 PM_NODE_LIST_FOREACH(parts, index, part) {
21826 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21827 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21828 } else {
21829 interpolated = true;
21830 break;
21831 }
21832 }
21833
21834 if (!interpolated && total_length > 0) {
21835 void *memory = xmalloc(total_length);
21836 if (!memory) abort();
21837
21838 uint8_t *cursor = memory;
21839 PM_NODE_LIST_FOREACH(parts, index, part) {
21840 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21841 size_t length = pm_string_length(unescaped);
21842
21843 memcpy(cursor, pm_string_source(unescaped), length);
21844 cursor += length;
21845 }
21846
21847 pm_string_t owned;
21848 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21849
21850 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21851 pm_string_free(&owned);
21852 }
21853 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21854 // If we have a regular expression node, then we can just parse
21855 // the named captures directly off the unescaped string.
21856 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21857 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21858 }
21859
21860 return result;
21861 }
21862 case PM_TOKEN_UAMPERSAND:
21863 case PM_TOKEN_USTAR:
21864 case PM_TOKEN_USTAR_STAR:
21865 // The only times this will occur are when we are in an error state,
21866 // but we'll put them in here so that errors can propagate.
21867 case PM_TOKEN_BANG_EQUAL:
21868 case PM_TOKEN_BANG_TILDE:
21869 case PM_TOKEN_EQUAL_EQUAL:
21870 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21871 case PM_TOKEN_LESS_EQUAL_GREATER:
21872 case PM_TOKEN_CARET:
21873 case PM_TOKEN_PIPE:
21874 case PM_TOKEN_AMPERSAND:
21875 case PM_TOKEN_GREATER_GREATER:
21876 case PM_TOKEN_LESS_LESS:
21877 case PM_TOKEN_MINUS:
21878 case PM_TOKEN_PLUS:
21879 case PM_TOKEN_PERCENT:
21880 case PM_TOKEN_SLASH:
21881 case PM_TOKEN_STAR:
21882 case PM_TOKEN_STAR_STAR: {
21883 parser_lex(parser);
21884 pm_token_t operator = parser->previous;
21885 switch (PM_NODE_TYPE(node)) {
21886 case PM_RESCUE_MODIFIER_NODE: {
21888 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21889 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21890 }
21891 break;
21892 }
21893 case PM_AND_NODE: {
21894 pm_and_node_t *cast = (pm_and_node_t *) node;
21895 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21896 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21897 }
21898 break;
21899 }
21900 case PM_OR_NODE: {
21901 pm_or_node_t *cast = (pm_or_node_t *) node;
21902 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21903 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21904 }
21905 break;
21906 }
21907 default:
21908 break;
21909 }
21910
21911 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21912 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21913 }
21914 case PM_TOKEN_GREATER:
21915 case PM_TOKEN_GREATER_EQUAL:
21916 case PM_TOKEN_LESS:
21917 case PM_TOKEN_LESS_EQUAL: {
21918 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21919 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21920 }
21921
21922 parser_lex(parser);
21923 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21924 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21925 }
21926 case PM_TOKEN_AMPERSAND_DOT:
21927 case PM_TOKEN_DOT: {
21928 parser_lex(parser);
21929 pm_token_t operator = parser->previous;
21930 pm_arguments_t arguments = { 0 };
21931
21932 // This if statement handles the foo.() syntax.
21933 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21934 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21935 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21936 }
21937
21938 switch (PM_NODE_TYPE(node)) {
21939 case PM_RESCUE_MODIFIER_NODE: {
21941 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21942 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21943 }
21944 break;
21945 }
21946 case PM_AND_NODE: {
21947 pm_and_node_t *cast = (pm_and_node_t *) node;
21948 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21949 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21950 }
21951 break;
21952 }
21953 case PM_OR_NODE: {
21954 pm_or_node_t *cast = (pm_or_node_t *) node;
21955 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21956 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21957 }
21958 break;
21959 }
21960 default:
21961 break;
21962 }
21963
21964 pm_token_t message;
21965
21966 switch (parser->current.type) {
21967 case PM_CASE_OPERATOR:
21968 case PM_CASE_KEYWORD:
21969 case PM_TOKEN_CONSTANT:
21970 case PM_TOKEN_IDENTIFIER:
21971 case PM_TOKEN_METHOD_NAME: {
21972 parser_lex(parser);
21973 message = parser->previous;
21974 break;
21975 }
21976 default: {
21977 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21978 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21979 }
21980 }
21981
21982 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21983 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21984
21985 if (
21986 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21987 arguments.arguments == NULL &&
21988 arguments.opening_loc.start == NULL &&
21989 match1(parser, PM_TOKEN_COMMA)
21990 ) {
21991 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21992 } else {
21993 return (pm_node_t *) call;
21994 }
21995 }
21996 case PM_TOKEN_DOT_DOT:
21997 case PM_TOKEN_DOT_DOT_DOT: {
21998 parser_lex(parser);
21999
22000 pm_node_t *right = NULL;
22001 if (token_begins_expression_p(parser->current.type)) {
22002 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
22003 }
22004
22005 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
22006 }
22007 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
22008 pm_token_t keyword = parser->current;
22009 parser_lex(parser);
22010
22011 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
22012 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
22013 }
22014 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
22015 pm_token_t keyword = parser->current;
22016 parser_lex(parser);
22017
22018 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
22019 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
22020 }
22021 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
22022 parser_lex(parser);
22023 pm_statements_node_t *statements = pm_statements_node_create(parser);
22024 pm_statements_node_body_append(parser, statements, node, true);
22025
22026 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
22027 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
22028 }
22029 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
22030 parser_lex(parser);
22031 pm_statements_node_t *statements = pm_statements_node_create(parser);
22032 pm_statements_node_body_append(parser, statements, node, true);
22033
22034 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
22035 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
22036 }
22037 case PM_TOKEN_QUESTION_MARK: {
22038 context_push(parser, PM_CONTEXT_TERNARY);
22039 pm_node_list_t current_block_exits = { 0 };
22040 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22041
22042 pm_token_t qmark = parser->current;
22043 parser_lex(parser);
22044
22045 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
22046
22047 if (parser->recovering) {
22048 // If parsing the true expression of this ternary resulted in a syntax
22049 // error that we can recover from, then we're going to put missing nodes
22050 // and tokens into the remaining places. We want to be sure to do this
22051 // before the `expect` function call to make sure it doesn't
22052 // accidentally move past a ':' token that occurs after the syntax
22053 // error.
22054 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
22055 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
22056
22057 context_pop(parser);
22058 pop_block_exits(parser, previous_block_exits);
22059 pm_node_list_free(&current_block_exits);
22060
22061 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22062 }
22063
22064 accept1(parser, PM_TOKEN_NEWLINE);
22065 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
22066
22067 pm_token_t colon = parser->previous;
22068 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
22069
22070 context_pop(parser);
22071 pop_block_exits(parser, previous_block_exits);
22072 pm_node_list_free(&current_block_exits);
22073
22074 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22075 }
22076 case PM_TOKEN_COLON_COLON: {
22077 parser_lex(parser);
22078 pm_token_t delimiter = parser->previous;
22079
22080 switch (parser->current.type) {
22081 case PM_TOKEN_CONSTANT: {
22082 parser_lex(parser);
22083 pm_node_t *path;
22084
22085 if (
22086 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
22087 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
22088 ) {
22089 // If we have a constant immediately following a '::' operator, then
22090 // this can either be a constant path or a method call, depending on
22091 // what follows the constant.
22092 //
22093 // If we have parentheses, then this is a method call. That would
22094 // look like Foo::Bar().
22095 pm_token_t message = parser->previous;
22096 pm_arguments_t arguments = { 0 };
22097
22098 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22099 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22100 } else {
22101 // Otherwise, this is a constant path. That would look like Foo::Bar.
22102 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22103 }
22104
22105 // If this is followed by a comma then it is a multiple assignment.
22106 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22107 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22108 }
22109
22110 return path;
22111 }
22112 case PM_CASE_OPERATOR:
22113 case PM_CASE_KEYWORD:
22114 case PM_TOKEN_IDENTIFIER:
22115 case PM_TOKEN_METHOD_NAME: {
22116 parser_lex(parser);
22117 pm_token_t message = parser->previous;
22118
22119 // If we have an identifier following a '::' operator, then it is for
22120 // sure a method call.
22121 pm_arguments_t arguments = { 0 };
22122 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22123 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22124
22125 // If this is followed by a comma then it is a multiple assignment.
22126 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22127 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22128 }
22129
22130 return (pm_node_t *) call;
22131 }
22132 case PM_TOKEN_PARENTHESIS_LEFT: {
22133 // If we have a parenthesis following a '::' operator, then it is the
22134 // method call shorthand. That would look like Foo::(bar).
22135 pm_arguments_t arguments = { 0 };
22136 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
22137
22138 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
22139 }
22140 default: {
22141 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
22142 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22143 }
22144 }
22145 }
22146 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
22147 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
22148 parser_lex(parser);
22149 accept1(parser, PM_TOKEN_NEWLINE);
22150
22151 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
22152 context_pop(parser);
22153
22154 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
22155 }
22156 case PM_TOKEN_BRACKET_LEFT: {
22157 parser_lex(parser);
22158
22159 pm_arguments_t arguments = { 0 };
22160 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22161
22162 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22163 pm_accepts_block_stack_push(parser, true);
22164 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22165 pm_accepts_block_stack_pop(parser);
22166 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22167 }
22168
22169 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22170
22171 // If we have a comma after the closing bracket then this is a multiple
22172 // assignment and we should parse the targets.
22173 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22174 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22175 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22176 }
22177
22178 // If we're at the end of the arguments, we can now check if there is a
22179 // block node that starts with a {. If there is, then we can parse it and
22180 // add it to the arguments.
22181 pm_block_node_t *block = NULL;
22182 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22183 block = parse_block(parser, (uint16_t) (depth + 1));
22184 pm_arguments_validate_block(parser, &arguments, block);
22185 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22186 block = parse_block(parser, (uint16_t) (depth + 1));
22187 }
22188
22189 if (block != NULL) {
22190 if (arguments.block != NULL) {
22191 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22192 if (arguments.arguments == NULL) {
22193 arguments.arguments = pm_arguments_node_create(parser);
22194 }
22195 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22196 }
22197
22198 arguments.block = (pm_node_t *) block;
22199 }
22200
22201 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22202 }
22203 case PM_TOKEN_KEYWORD_IN: {
22204 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22205 parser->pattern_matching_newlines = true;
22206
22207 pm_token_t operator = parser->current;
22208 parser->command_start = false;
22209 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22210 parser_lex(parser);
22211
22212 pm_constant_id_list_t captures = { 0 };
22213 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22214
22215 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22216 pm_constant_id_list_free(&captures);
22217
22218 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22219 }
22220 case PM_TOKEN_EQUAL_GREATER: {
22221 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22222 parser->pattern_matching_newlines = true;
22223
22224 pm_token_t operator = parser->current;
22225 parser->command_start = false;
22226 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22227 parser_lex(parser);
22228
22229 pm_constant_id_list_t captures = { 0 };
22230 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22231
22232 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22233 pm_constant_id_list_free(&captures);
22234
22235 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22236 }
22237 default:
22238 assert(false && "unreachable");
22239 return NULL;
22240 }
22241}
22242
22243#undef PM_PARSE_PATTERN_SINGLE
22244#undef PM_PARSE_PATTERN_TOP
22245#undef PM_PARSE_PATTERN_MULTI
22246
22251static inline bool
22252pm_call_node_command_p(const pm_call_node_t *node) {
22253 return (
22254 (node->opening_loc.start == NULL) &&
22255 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22256 (node->arguments != NULL || node->block != NULL)
22257 );
22258}
22259
22268static pm_node_t *
22269parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22270 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22271 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22272 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22273 }
22274
22275 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22276
22277 switch (PM_NODE_TYPE(node)) {
22278 case PM_MISSING_NODE:
22279 // If we found a syntax error, then the type of node returned by
22280 // parse_expression_prefix is going to be a missing node.
22281 return node;
22282 case PM_PRE_EXECUTION_NODE:
22283 case PM_POST_EXECUTION_NODE:
22284 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22285 case PM_ALIAS_METHOD_NODE:
22286 case PM_MULTI_WRITE_NODE:
22287 case PM_UNDEF_NODE:
22288 // These expressions are statements, and cannot be followed by
22289 // operators (except modifiers).
22290 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22291 return node;
22292 }
22293 break;
22294 case PM_CALL_NODE:
22295 // If we have a call node, then we need to check if it looks like a
22296 // method call without parentheses that contains arguments. If it
22297 // does, then it has different rules for parsing infix operators,
22298 // namely that it only accepts composition (and/or) and modifiers
22299 // (if/unless/etc.).
22300 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22301 return node;
22302 }
22303 break;
22304 case PM_SYMBOL_NODE:
22305 // If we have a symbol node that is being parsed as a label, then we
22306 // need to immediately return, because there should never be an
22307 // infix operator following this node.
22308 if (pm_symbol_node_label_p(node)) {
22309 return node;
22310 }
22311 break;
22312 default:
22313 break;
22314 }
22315
22316 // Otherwise we'll look and see if the next token can be parsed as an infix
22317 // operator. If it can, then we'll parse it using parse_expression_infix.
22318 pm_binding_powers_t current_binding_powers;
22319 pm_token_type_t current_token_type;
22320
22321 while (
22322 current_token_type = parser->current.type,
22323 current_binding_powers = pm_binding_powers[current_token_type],
22324 binding_power <= current_binding_powers.left &&
22325 current_binding_powers.binary
22326 ) {
22327 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22328
22329 if (context_terminator(parser->current_context->context, &parser->current)) {
22330 // If this token terminates the current context, then we need to
22331 // stop parsing the expression, as it has become a statement.
22332 return node;
22333 }
22334
22335 switch (PM_NODE_TYPE(node)) {
22336 case PM_MULTI_WRITE_NODE:
22337 // Multi-write nodes are statements, and cannot be followed by
22338 // operators except modifiers.
22339 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22340 return node;
22341 }
22342 break;
22343 case PM_CLASS_VARIABLE_WRITE_NODE:
22344 case PM_CONSTANT_PATH_WRITE_NODE:
22345 case PM_CONSTANT_WRITE_NODE:
22346 case PM_GLOBAL_VARIABLE_WRITE_NODE:
22347 case PM_INSTANCE_VARIABLE_WRITE_NODE:
22348 case PM_LOCAL_VARIABLE_WRITE_NODE:
22349 // These expressions are statements, by virtue of the right-hand
22350 // side of their write being an implicit array.
22351 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22352 return node;
22353 }
22354 break;
22355 case PM_CALL_NODE:
22356 // These expressions are also statements, by virtue of the
22357 // right-hand side of the expression (i.e., the last argument to
22358 // the call node) being an implicit array.
22359 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22360 return node;
22361 }
22362 break;
22363 default:
22364 break;
22365 }
22366
22367 // If the operator is nonassoc and we should not be able to parse the
22368 // upcoming infix operator, break.
22369 if (current_binding_powers.nonassoc) {
22370 // If this is a non-assoc operator and we are about to parse the
22371 // exact same operator, then we need to add an error.
22372 if (match1(parser, current_token_type)) {
22373 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22374 break;
22375 }
22376
22377 // If this is an endless range, then we need to reject a couple of
22378 // additional operators because it violates the normal operator
22379 // precedence rules. Those patterns are:
22380 //
22381 // 1.. & 2
22382 // 1.. * 2
22383 //
22384 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22385 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22386 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22387 break;
22388 }
22389
22390 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22391 break;
22392 }
22393 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22394 break;
22395 }
22396 }
22397
22398 if (accepts_command_call) {
22399 // A command-style method call is only accepted on method chains.
22400 // Thus, we check whether the parsed node can continue method chains.
22401 // The method chain can continue if the parsed node is one of the following five kinds:
22402 // (1) index access: foo[1]
22403 // (2) attribute access: foo.bar
22404 // (3) method call with parenthesis: foo.bar(1)
22405 // (4) method call with a block: foo.bar do end
22406 // (5) constant path: foo::Bar
22407 switch (node->type) {
22408 case PM_CALL_NODE: {
22409 pm_call_node_t *cast = (pm_call_node_t *)node;
22410 if (
22411 // (1) foo[1]
22412 !(
22413 cast->call_operator_loc.start == NULL &&
22414 cast->message_loc.start != NULL &&
22415 cast->message_loc.start[0] == '[' &&
22416 cast->message_loc.end[-1] == ']'
22417 ) &&
22418 // (2) foo.bar
22419 !(
22420 cast->call_operator_loc.start != NULL &&
22421 cast->arguments == NULL &&
22422 cast->block == NULL &&
22423 cast->opening_loc.start == NULL
22424 ) &&
22425 // (3) foo.bar(1)
22426 !(
22427 cast->call_operator_loc.start != NULL &&
22428 cast->opening_loc.start != NULL
22429 ) &&
22430 // (4) foo.bar do end
22431 !(
22432 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22433 )
22434 ) {
22435 accepts_command_call = false;
22436 }
22437 break;
22438 }
22439 // (5) foo::Bar
22440 case PM_CONSTANT_PATH_NODE:
22441 break;
22442 default:
22443 accepts_command_call = false;
22444 break;
22445 }
22446 }
22447 }
22448
22449 return node;
22450}
22451
22456static pm_statements_node_t *
22457wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22458 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22459 if (statements == NULL) {
22460 statements = pm_statements_node_create(parser);
22461 }
22462
22463 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22464 pm_arguments_node_arguments_append(
22465 arguments,
22466 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22467 );
22468
22469 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22470 parser,
22471 arguments,
22472 pm_parser_constant_id_constant(parser, "print", 5)
22473 ), true);
22474 }
22475
22476 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22477 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22478 if (statements == NULL) {
22479 statements = pm_statements_node_create(parser);
22480 }
22481
22482 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22483 pm_arguments_node_arguments_append(
22484 arguments,
22485 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22486 );
22487
22488 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22489 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22490
22491 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22492 parser,
22493 pm_parser_constant_id_constant(parser, "$F", 2),
22494 (pm_node_t *) call
22495 );
22496
22497 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22498 }
22499
22500 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22501 pm_arguments_node_arguments_append(
22502 arguments,
22503 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22504 );
22505
22506 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22507 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22508 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22509 parser,
22510 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22511 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22512 (pm_node_t *) pm_true_node_synthesized_create(parser)
22513 ));
22514
22515 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22516 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22517 }
22518
22519 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22520 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22521 parser,
22522 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22523 statements
22524 ), true);
22525
22526 statements = wrapped_statements;
22527 }
22528
22529 return statements;
22530}
22531
22535static pm_node_t *
22536parse_program(pm_parser_t *parser) {
22537 // If the current scope is NULL, then we want to push a new top level scope.
22538 // The current scope could exist in the event that we are parsing an eval
22539 // and the user has passed into scopes that already exist.
22540 if (parser->current_scope == NULL) {
22541 pm_parser_scope_push(parser, true);
22542 }
22543
22544 pm_node_list_t current_block_exits = { 0 };
22545 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22546
22547 parser_lex(parser);
22548 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22549
22550 if (statements != NULL && !parser->parsing_eval) {
22551 // If we have statements, then the top-level statement should be
22552 // explicitly checked as well. We have to do this here because
22553 // everywhere else we check all but the last statement.
22554 assert(statements->body.size > 0);
22555 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22556 }
22557
22558 pm_constant_id_list_t locals;
22559 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22560 pm_parser_scope_pop(parser);
22561
22562 // At the top level, see if we need to wrap the statements in a program
22563 // node with a while loop based on the options.
22565 statements = wrap_statements(parser, statements);
22566 } else {
22567 flush_block_exits(parser, previous_block_exits);
22568 }
22569
22570 pm_node_list_free(&current_block_exits);
22571
22572 // If this is an empty file, then we're still going to parse all of the
22573 // statements in order to gather up all of the comments and such. Here we'll
22574 // correct the location information.
22575 if (statements == NULL) {
22576 statements = pm_statements_node_create(parser);
22577 pm_statements_node_location_set(statements, parser->start, parser->start);
22578 }
22579
22580 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22581}
22582
22583/******************************************************************************/
22584/* External functions */
22585/******************************************************************************/
22586
22596static const char *
22597pm_strnstr(const char *big, const char *little, size_t big_length) {
22598 size_t little_length = strlen(little);
22599
22600 for (const char *big_end = big + big_length; big < big_end; big++) {
22601 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22602 }
22603
22604 return NULL;
22605}
22606
22607#ifdef _WIN32
22608#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22609#else
22615static void
22616pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22617 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22618 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22619 }
22620}
22621#endif
22622
22627static void
22628pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22629 const char *switches = pm_strnstr(engine, " -", length);
22630 if (switches == NULL) return;
22631
22632 pm_options_t next_options = *options;
22633 options->shebang_callback(
22634 &next_options,
22635 (const uint8_t *) (switches + 1),
22636 length - ((size_t) (switches - engine)) - 1,
22637 options->shebang_callback_data
22638 );
22639
22640 size_t encoding_length;
22641 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22642 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22643 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22644 }
22645
22646 parser->command_line = next_options.command_line;
22647 parser->frozen_string_literal = next_options.frozen_string_literal;
22648}
22649
22654pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22655 assert(source != NULL);
22656
22657 *parser = (pm_parser_t) {
22658 .node_id = 0,
22659 .lex_state = PM_LEX_STATE_BEG,
22660 .enclosure_nesting = 0,
22661 .lambda_enclosure_nesting = -1,
22662 .brace_nesting = 0,
22663 .do_loop_stack = 0,
22664 .accepts_block_stack = 0,
22665 .lex_modes = {
22666 .index = 0,
22667 .stack = {{ .mode = PM_LEX_DEFAULT }},
22668 .current = &parser->lex_modes.stack[0],
22669 },
22670 .start = source,
22671 .end = source + size,
22672 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22673 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22674 .next_start = NULL,
22675 .heredoc_end = NULL,
22676 .data_loc = { .start = NULL, .end = NULL },
22677 .comment_list = { 0 },
22678 .magic_comment_list = { 0 },
22679 .warning_list = { 0 },
22680 .error_list = { 0 },
22681 .current_scope = NULL,
22682 .current_context = NULL,
22683 .encoding = PM_ENCODING_UTF_8_ENTRY,
22684 .encoding_changed_callback = NULL,
22685 .encoding_comment_start = source,
22686 .lex_callback = NULL,
22687 .filepath = { 0 },
22688 .constant_pool = { 0 },
22689 .newline_list = { 0 },
22690 .integer_base = 0,
22691 .current_string = PM_STRING_EMPTY,
22692 .start_line = 1,
22693 .explicit_encoding = NULL,
22694 .command_line = 0,
22695 .parsing_eval = false,
22696 .partial_script = false,
22697 .command_start = true,
22698 .recovering = false,
22699 .encoding_locked = false,
22700 .encoding_changed = false,
22701 .pattern_matching_newlines = false,
22702 .in_keyword_arg = false,
22703 .current_block_exits = NULL,
22704 .semantic_token_seen = false,
22705 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22706 .current_regular_expression_ascii_only = false,
22707 .warn_mismatched_indentation = true
22708 };
22709
22710 // Initialize the constant pool. We're going to completely guess as to the
22711 // number of constants that we'll need based on the size of the input. The
22712 // ratio we chose here is actually less arbitrary than you might think.
22713 //
22714 // We took ~50K Ruby files and measured the size of the file versus the
22715 // number of constants that were found in those files. Then we found the
22716 // average and standard deviation of the ratios of constants/bytesize. Then
22717 // we added 1.34 standard deviations to the average to get a ratio that
22718 // would fit 75% of the files (for a two-tailed distribution). This works
22719 // because there was about a 0.77 correlation and the distribution was
22720 // roughly normal.
22721 //
22722 // This ratio will need to change if we add more constants to the constant
22723 // pool for another node type.
22724 uint32_t constant_size = ((uint32_t) size) / 95;
22725 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22726
22727 // Initialize the newline list. Similar to the constant pool, we're going to
22728 // guess at the number of newlines that we'll need based on the size of the
22729 // input.
22730 size_t newline_size = size / 22;
22731 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22732
22733 // If options were provided to this parse, establish them here.
22734 if (options != NULL) {
22735 // filepath option
22736 parser->filepath = options->filepath;
22737
22738 // line option
22739 parser->start_line = options->line;
22740
22741 // encoding option
22742 size_t encoding_length = pm_string_length(&options->encoding);
22743 if (encoding_length > 0) {
22744 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22745 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22746 }
22747
22748 // encoding_locked option
22749 parser->encoding_locked = options->encoding_locked;
22750
22751 // frozen_string_literal option
22753
22754 // command_line option
22755 parser->command_line = options->command_line;
22756
22757 // version option
22758 parser->version = options->version;
22759
22760 // partial_script
22761 parser->partial_script = options->partial_script;
22762
22763 // scopes option
22764 parser->parsing_eval = options->scopes_count > 0;
22765 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22766
22767 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22768 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22769 pm_parser_scope_push(parser, scope_index == 0);
22770
22771 // Scopes given from the outside are not allowed to have numbered
22772 // parameters.
22773 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22774
22775 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22776 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22777
22778 const uint8_t *source = pm_string_source(local);
22779 size_t length = pm_string_length(local);
22780
22781 void *allocated = xmalloc(length);
22782 if (allocated == NULL) continue;
22783
22784 memcpy(allocated, source, length);
22785 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22786 }
22787 }
22788 }
22789
22790 // Now that we have established the user-provided options, check if
22791 // a version was given and parse as the latest version otherwise.
22792 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22794 }
22795
22796 pm_accepts_block_stack_push(parser, true);
22797
22798 // Skip past the UTF-8 BOM if it exists.
22799 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22800 parser->current.end += 3;
22801 parser->encoding_comment_start += 3;
22802
22803 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22805 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22806 }
22807 }
22808
22809 // If the -x command line flag is set, or the first shebang of the file does
22810 // not include "ruby", then we'll search for a shebang that does include
22811 // "ruby" and start parsing from there.
22812 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22813
22814 // If the first two bytes of the source are a shebang, then we will do a bit
22815 // of extra processing.
22816 //
22817 // First, we'll indicate that the encoding comment is at the end of the
22818 // shebang. This means that when a shebang is present the encoding comment
22819 // can begin on the second line.
22820 //
22821 // Second, we will check if the shebang includes "ruby". If it does, then we
22822 // we will start parsing from there. We will also potentially warning the
22823 // user if there is a carriage return at the end of the shebang. We will
22824 // also potentially call the shebang callback if this is the main script to
22825 // allow the caller to parse the shebang and find any command-line options.
22826 // If the shebang does not include "ruby" and this is the main script being
22827 // parsed, then we will start searching the file for a shebang that does
22828 // contain "ruby" as if -x were passed on the command line.
22829 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22830 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22831
22832 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22833 const char *engine;
22834
22835 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22836 if (newline != NULL) {
22837 parser->encoding_comment_start = newline + 1;
22838
22839 if (options == NULL || options->main_script) {
22840 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22841 }
22842 }
22843
22844 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22845 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22846 }
22847
22848 search_shebang = false;
22849 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22850 search_shebang = true;
22851 }
22852 }
22853
22854 // Here we're going to find the first shebang that includes "ruby" and start
22855 // parsing from there.
22856 if (search_shebang) {
22857 // If a shebang that includes "ruby" is not found, then we're going to a
22858 // a load error to the list of errors on the parser.
22859 bool found_shebang = false;
22860
22861 // This is going to point to the start of each line as we check it.
22862 // We'll maintain a moving window looking at each line at they come.
22863 const uint8_t *cursor = parser->start;
22864
22865 // The newline pointer points to the end of the current line that we're
22866 // considering. If it is NULL, then we're at the end of the file.
22867 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22868
22869 while (newline != NULL) {
22870 pm_newline_list_append(&parser->newline_list, newline);
22871
22872 cursor = newline + 1;
22873 newline = next_newline(cursor, parser->end - cursor);
22874
22875 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22876 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22877 const char *engine;
22878 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22879 found_shebang = true;
22880
22881 if (newline != NULL) {
22882 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22883 parser->encoding_comment_start = newline + 1;
22884 }
22885
22886 if (options != NULL && options->shebang_callback != NULL) {
22887 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22888 }
22889
22890 break;
22891 }
22892 }
22893 }
22894
22895 if (found_shebang) {
22896 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22897 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22898 } else {
22899 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22900 pm_newline_list_clear(&parser->newline_list);
22901 }
22902 }
22903
22904 // The encoding comment can start after any amount of inline whitespace, so
22905 // here we'll advance it to the first non-inline-whitespace character so
22906 // that it is ready for future comparisons.
22907 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22908}
22909
22918
22922static inline void
22923pm_comment_list_free(pm_list_t *list) {
22924 pm_list_node_t *node, *next;
22925
22926 for (node = list->head; node != NULL; node = next) {
22927 next = node->next;
22928
22929 pm_comment_t *comment = (pm_comment_t *) node;
22930 xfree(comment);
22931 }
22932}
22933
22937static inline void
22938pm_magic_comment_list_free(pm_list_t *list) {
22939 pm_list_node_t *node, *next;
22940
22941 for (node = list->head; node != NULL; node = next) {
22942 next = node->next;
22943
22946 }
22947}
22948
22954 pm_string_free(&parser->filepath);
22955 pm_diagnostic_list_free(&parser->error_list);
22956 pm_diagnostic_list_free(&parser->warning_list);
22957 pm_comment_list_free(&parser->comment_list);
22958 pm_magic_comment_list_free(&parser->magic_comment_list);
22959 pm_constant_pool_free(&parser->constant_pool);
22960 pm_newline_list_free(&parser->newline_list);
22961
22962 while (parser->current_scope != NULL) {
22963 // Normally, popping the scope doesn't free the locals since it is
22964 // assumed that ownership has transferred to the AST. However if we have
22965 // scopes while we're freeing the parser, it's likely they came from
22966 // eval scopes and we need to free them explicitly here.
22967 pm_parser_scope_pop(parser);
22968 }
22969
22970 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22971 lex_mode_pop(parser);
22972 }
22973}
22974
22980 return parse_program(parser);
22981}
22982
22988static bool
22989pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22990#define LINE_SIZE 4096
22991 char line[LINE_SIZE];
22992
22993 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22994 size_t length = LINE_SIZE;
22995 while (length > 0 && line[length - 1] == '\n') length--;
22996
22997 if (length == LINE_SIZE) {
22998 // If we read a line that is the maximum size and it doesn't end
22999 // with a newline, then we'll just append it to the buffer and
23000 // continue reading.
23001 length--;
23002 pm_buffer_append_string(buffer, line, length);
23003 continue;
23004 }
23005
23006 // Append the line to the buffer.
23007 length--;
23008 pm_buffer_append_string(buffer, line, length);
23009
23010 // Check if the line matches the __END__ marker. If it does, then stop
23011 // reading and return false. In most circumstances, this means we should
23012 // stop reading from the stream so that the DATA constant can pick it
23013 // up.
23014 switch (length) {
23015 case 7:
23016 if (strncmp(line, "__END__", 7) == 0) return false;
23017 break;
23018 case 8:
23019 if (strncmp(line, "__END__\n", 8) == 0) return false;
23020 break;
23021 case 9:
23022 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
23023 break;
23024 }
23025
23026 // All data should be read via gets. If the string returned by gets
23027 // _doesn't_ end with a newline, then we assume we hit EOF condition.
23028 if (stream_feof(stream)) {
23029 break;
23030 }
23031 }
23032
23033 return true;
23034#undef LINE_SIZE
23035}
23036
23046static bool
23047pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
23048 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
23049
23050 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
23051 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
23052 return true;
23053 }
23054 }
23055
23056 return false;
23057}
23058
23066pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
23067 pm_buffer_init(buffer);
23068
23069 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23070
23071 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23072 pm_node_t *node = pm_parse(parser);
23073
23074 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
23075 pm_node_destroy(parser, node);
23076 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23077
23078 pm_parser_free(parser);
23079 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23080 node = pm_parse(parser);
23081 }
23082
23083 return node;
23084}
23085
23090pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
23091 pm_options_t options = { 0 };
23092 pm_options_read(&options, data);
23093
23094 pm_parser_t parser;
23095 pm_parser_init(&parser, source, size, &options);
23096
23097 pm_node_t *node = pm_parse(&parser);
23098 pm_node_destroy(&parser, node);
23099
23100 bool result = parser.error_list.size == 0;
23101 pm_parser_free(&parser);
23102 pm_options_free(&options);
23103
23104 return result;
23105}
23106
23107#undef PM_CASE_KEYWORD
23108#undef PM_CASE_OPERATOR
23109#undef PM_CASE_WRITABLE
23110#undef PM_STRING_EMPTY
23111#undef PM_LOCATION_NODE_BASE_VALUE
23112#undef PM_LOCATION_NODE_VALUE
23113#undef PM_LOCATION_NULL_VALUE
23114#undef PM_LOCATION_TOKEN_VALUE
23115
23116// We optionally support serializing to a binary string. For systems that don't
23117// want or need this functionality, it can be turned off with the
23118// PRISM_EXCLUDE_SERIALIZATION define.
23119#ifndef PRISM_EXCLUDE_SERIALIZATION
23120
23121static inline void
23122pm_serialize_header(pm_buffer_t *buffer) {
23123 pm_buffer_append_string(buffer, "PRISM", 5);
23124 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
23125 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
23126 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
23127 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
23128}
23129
23134pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
23135 pm_serialize_header(buffer);
23136 pm_serialize_content(parser, node, buffer);
23137 pm_buffer_append_byte(buffer, '\0');
23138}
23139
23145pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23146 pm_options_t options = { 0 };
23147 pm_options_read(&options, data);
23148
23149 pm_parser_t parser;
23150 pm_parser_init(&parser, source, size, &options);
23151
23152 pm_node_t *node = pm_parse(&parser);
23153
23154 pm_serialize_header(buffer);
23155 pm_serialize_content(&parser, node, buffer);
23156 pm_buffer_append_byte(buffer, '\0');
23157
23158 pm_node_destroy(&parser, node);
23159 pm_parser_free(&parser);
23160 pm_options_free(&options);
23161}
23162
23168pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
23169 pm_parser_t parser;
23170 pm_options_t options = { 0 };
23171 pm_options_read(&options, data);
23172
23173 pm_buffer_t parser_buffer;
23174 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
23175 pm_serialize_header(buffer);
23176 pm_serialize_content(&parser, node, buffer);
23177 pm_buffer_append_byte(buffer, '\0');
23178
23179 pm_node_destroy(&parser, node);
23180 pm_buffer_free(&parser_buffer);
23181 pm_parser_free(&parser);
23182 pm_options_free(&options);
23183}
23184
23189pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23190 pm_options_t options = { 0 };
23191 pm_options_read(&options, data);
23192
23193 pm_parser_t parser;
23194 pm_parser_init(&parser, source, size, &options);
23195
23196 pm_node_t *node = pm_parse(&parser);
23197 pm_serialize_header(buffer);
23198 pm_serialize_encoding(parser.encoding, buffer);
23199 pm_buffer_append_varsint(buffer, parser.start_line);
23200 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23201
23202 pm_node_destroy(&parser, node);
23203 pm_parser_free(&parser);
23204 pm_options_free(&options);
23205}
23206
23207#endif
23208
23209/******************************************************************************/
23210/* Slice queries for the Ruby API */
23211/******************************************************************************/
23212
23214typedef enum {
23216 PM_SLICE_TYPE_ERROR = -1,
23217
23219 PM_SLICE_TYPE_NONE,
23220
23222 PM_SLICE_TYPE_LOCAL,
23223
23225 PM_SLICE_TYPE_CONSTANT,
23226
23228 PM_SLICE_TYPE_METHOD_NAME
23229} pm_slice_type_t;
23230
23234pm_slice_type_t
23235pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23236 // first, get the right encoding object
23237 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23238 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23239
23240 // check that there is at least one character
23241 if (length == 0) return PM_SLICE_TYPE_NONE;
23242
23243 size_t width;
23244 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23245 // valid because alphabetical
23246 } else if (*source == '_') {
23247 // valid because underscore
23248 width = 1;
23249 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23250 // valid because multibyte
23251 } else {
23252 // invalid because no match
23253 return PM_SLICE_TYPE_NONE;
23254 }
23255
23256 // determine the type of the slice based on the first character
23257 const uint8_t *end = source + length;
23258 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23259
23260 // next, iterate through all of the bytes of the string to ensure that they
23261 // are all valid identifier characters
23262 source += width;
23263
23264 while (source < end) {
23265 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23266 // valid because alphanumeric
23267 source += width;
23268 } else if (*source == '_') {
23269 // valid because underscore
23270 source++;
23271 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23272 // valid because multibyte
23273 source += width;
23274 } else {
23275 // invalid because no match
23276 break;
23277 }
23278 }
23279
23280 // accept a ! or ? at the end of the slice as a method name
23281 if (*source == '!' || *source == '?' || *source == '=') {
23282 source++;
23283 result = PM_SLICE_TYPE_METHOD_NAME;
23284 }
23285
23286 // valid if we are at the end of the slice
23287 return source == end ? result : PM_SLICE_TYPE_NONE;
23288}
23289
23294pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23295 switch (pm_slice_type(source, length, encoding_name)) {
23296 case PM_SLICE_TYPE_ERROR:
23297 return PM_STRING_QUERY_ERROR;
23298 case PM_SLICE_TYPE_NONE:
23299 case PM_SLICE_TYPE_CONSTANT:
23300 case PM_SLICE_TYPE_METHOD_NAME:
23301 return PM_STRING_QUERY_FALSE;
23302 case PM_SLICE_TYPE_LOCAL:
23303 return PM_STRING_QUERY_TRUE;
23304 }
23305
23306 assert(false && "unreachable");
23307 return PM_STRING_QUERY_FALSE;
23308}
23309
23314pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23315 switch (pm_slice_type(source, length, encoding_name)) {
23316 case PM_SLICE_TYPE_ERROR:
23317 return PM_STRING_QUERY_ERROR;
23318 case PM_SLICE_TYPE_NONE:
23319 case PM_SLICE_TYPE_LOCAL:
23320 case PM_SLICE_TYPE_METHOD_NAME:
23321 return PM_STRING_QUERY_FALSE;
23322 case PM_SLICE_TYPE_CONSTANT:
23323 return PM_STRING_QUERY_TRUE;
23324 }
23325
23326 assert(false && "unreachable");
23327 return PM_STRING_QUERY_FALSE;
23328}
23329
23334pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23335#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23336#define C1(c) (*source == c)
23337#define C2(s) (memcmp(source, s, 2) == 0)
23338#define C3(s) (memcmp(source, s, 3) == 0)
23339
23340 switch (pm_slice_type(source, length, encoding_name)) {
23341 case PM_SLICE_TYPE_ERROR:
23342 return PM_STRING_QUERY_ERROR;
23343 case PM_SLICE_TYPE_NONE:
23344 break;
23345 case PM_SLICE_TYPE_LOCAL:
23346 // numbered parameters are not valid method names
23347 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23348 case PM_SLICE_TYPE_CONSTANT:
23349 // all constants are valid method names
23350 case PM_SLICE_TYPE_METHOD_NAME:
23351 // all method names are valid method names
23352 return PM_STRING_QUERY_TRUE;
23353 }
23354
23355 switch (length) {
23356 case 1:
23357 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23358 case 2:
23359 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23360 case 3:
23361 return B(C3("===") || C3("<=>") || C3("[]="));
23362 default:
23363 return PM_STRING_QUERY_FALSE;
23364 }
23365
23366#undef B
23367#undef C1
23368#undef C2
23369#undef C3
23370}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:208
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:192
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:172
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:219
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:225
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:98
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_3_5
The vendored version of prism in CRuby 3.5.x.
Definition options.h:95
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:351
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:359
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:367
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22979
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22915
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22953
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:23066
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:22654
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:18129
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:18131
const uint8_t * start
The start of the regular expression.
Definition prism.c:18134
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18145
const uint8_t * end
The end of the regular expression.
Definition prism.c:18137
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:21050
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:21061
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:21052
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:21058
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:21055
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:21068
AndNode.
Definition ast.h:1262
struct pm_node * left
AndNode::left.
Definition ast.h:1278
struct pm_node * right
AndNode::right.
Definition ast.h:1291
ArgumentsNode.
Definition ast.h:1323
pm_node_t base
The embedded base node.
Definition ast.h:1325
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1336
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1354
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1364
ArrayPatternNode.
Definition ast.h:1415
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1434
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1474
pm_node_t base
The embedded base node.
Definition ast.h:1417
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1484
AssocNode.
Definition ast.h:1499
struct pm_node * value
AssocNode::value.
Definition ast.h:1531
struct pm_node * key
AssocNode::key.
Definition ast.h:1518
BeginNode.
Definition ast.h:1625
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1678
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1658
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1648
pm_node_t base
The embedded base node.
Definition ast.h:1627
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1668
This struct represents a set of binding powers used for a given token.
Definition prism.c:13038
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:13046
pm_binding_power_t left
The left binding power.
Definition prism.c:13040
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:13052
pm_binding_power_t right
The right binding power.
Definition prism.c:13043
BlockLocalVariableNode.
Definition ast.h:1744
BlockNode.
Definition ast.h:1772
BlockParameterNode.
Definition ast.h:1848
BlockParametersNode.
Definition ast.h:1902
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2129
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2190
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2210
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2148
pm_constant_id_t name
CallNode::name.
Definition ast.h:2171
pm_node_t base
The embedded base node.
Definition ast.h:2131
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2223
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2161
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2181
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2200
struct pm_node * block
CallNode::block.
Definition ast.h:2233
CaseMatchNode.
Definition ast.h:2568
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2591
CaseNode.
Definition ast.h:2638
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2661
ClassVariableReadNode.
Definition ast.h:2933
ClassVariableTargetNode.
Definition ast.h:2962
ClassVariableWriteNode.
Definition ast.h:2985
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3199
ConstantPathTargetNode.
Definition ast.h:3337
ConstantReadNode.
Definition ast.h:3432
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3461
ConstantWriteNode.
Definition ast.h:3484
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:364
ElseNode.
Definition ast.h:3663
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3676
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3761
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3774
FindPatternNode.
Definition ast.h:3821
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3834
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3886
pm_node_t base
The embedded base node.
Definition ast.h:3823
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3899
FlipFlopNode.
Definition ast.h:3917
FloatNode.
Definition ast.h:3950
double value
FloatNode::value.
Definition ast.h:3960
pm_node_t base
The embedded base node.
Definition ast.h:3952
ForwardingParameterNode.
Definition ast.h:4086
GlobalVariableReadNode.
Definition ast.h:4246
GlobalVariableTargetNode.
Definition ast.h:4275
GlobalVariableWriteNode.
Definition ast.h:4298
HashNode.
Definition ast.h:4360
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4386
HashPatternNode.
Definition ast.h:4420
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4475
pm_node_t base
The embedded base node.
Definition ast.h:4422
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4488
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4436
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4509
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4569
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4588
ImaginaryNode.
Definition ast.h:4615
InstanceVariableReadNode.
Definition ast.h:5105
InstanceVariableTargetNode.
Definition ast.h:5134
InstanceVariableWriteNode.
Definition ast.h:5157
IntegerNode.
Definition ast.h:5225
pm_integer_t value
IntegerNode::value.
Definition ast.h:5235
pm_node_t base
The embedded base node.
Definition ast.h:5227
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5263
InterpolatedRegularExpressionNode.
Definition ast.h:5309
InterpolatedStringNode.
Definition ast.h:5346
pm_node_t base
The embedded base node.
Definition ast.h:5348
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5354
InterpolatedSymbolNode.
Definition ast.h:5379
pm_node_t base
The embedded base node.
Definition ast.h:5381
InterpolatedXStringNode.
Definition ast.h:5412
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5420
pm_node_t base
The embedded base node.
Definition ast.h:5414
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5425
KeywordHashNode.
Definition ast.h:5484
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@95 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
union pm_lex_mode::@96 as
The data associated with this type of lex mode.
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5726
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5757
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5744
LocalVariableTargetNode.
Definition ast.h:5775
LocalVariableWriteNode.
Definition ast.h:5803
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5830
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5817
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5895
MatchWriteNode.
Definition ast.h:6053
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6066
MissingNode.
Definition ast.h:6078
MultiTargetNode.
Definition ast.h:6149
pm_node_t base
The embedded base node.
Definition ast.h:6151
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6207
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6167
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6217
MultiWriteNode.
Definition ast.h:6232
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1068
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1073
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1079
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
OptionalParameterNode.
Definition ast.h:6505
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:104
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:153
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:115
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:169
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:176
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:130
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:109
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:162
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:186
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:135
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:118
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:150
OrNode.
Definition ast.h:6543
struct pm_node * left
OrNode::left.
Definition ast.h:6559
struct pm_node * right
OrNode::right.
Definition ast.h:6572
ParametersNode.
Definition ast.h:6598
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6616
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6636
pm_node_t base
The embedded base node.
Definition ast.h:6600
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6631
ParenthesesNode.
Definition ast.h:6654
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6662
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
struct pm_parser::@101 lex_modes
A stack of lex modes.
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6890
struct pm_node * right
RangeNode::right.
Definition ast.h:6920
struct pm_node * left
RangeNode::left.
Definition ast.h:6906
RationalNode.
Definition ast.h:6948
pm_node_t base
The embedded base node.
Definition ast.h:6950
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6960
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10384
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10389
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10386
RegularExpressionNode.
Definition ast.h:7015
pm_node_t base
The embedded base node.
Definition ast.h:7017
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7038
RequiredParameterNode.
Definition ast.h:7089
RescueModifierNode.
Definition ast.h:7112
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7130
RescueNode.
Definition ast.h:7150
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7188
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7178
pm_node_t base
The embedded base node.
Definition ast.h:7152
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7450
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7463
StatementsNode.
Definition ast.h:7478
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7486
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7513
pm_node_t base
The embedded base node.
Definition ast.h:7515
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7536
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7531
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7521
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@102 type
The type of the string.
SymbolNode.
Definition ast.h:7605
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7618
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7628
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10358
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10363
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10369
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7661
UnlessNode.
Definition ast.h:7692
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7742
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7752
WhenNode.
Definition ast.h:7828
XStringNode.
Definition ast.h:7919