Ruby 3.5.0dev (2025-09-18 revision 0bb6a8bea49fed8ccef0a70aca5f2ea05af94292)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1043 case PM_MATCH_REQUIRED_NODE:
1044 return void_node != NULL ? void_node : node;
1045 case PM_MATCH_PREDICATE_NODE:
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1151 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1185 case PM_BACK_REFERENCE_READ_NODE:
1186 case PM_CLASS_VARIABLE_READ_NODE:
1187 case PM_GLOBAL_VARIABLE_READ_NODE:
1188 case PM_INSTANCE_VARIABLE_READ_NODE:
1189 case PM_LOCAL_VARIABLE_READ_NODE:
1190 case PM_NUMBERED_REFERENCE_READ_NODE:
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1249 case PM_CONSTANT_PATH_NODE:
1250 type = "::";
1251 length = 2;
1252 break;
1253 case PM_CONSTANT_READ_NODE:
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1268 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1269 case PM_INTERPOLATED_STRING_NODE:
1270 case PM_RATIONAL_NODE:
1271 case PM_REGULAR_EXPRESSION_NODE:
1272 case PM_SOURCE_ENCODING_NODE:
1273 case PM_SOURCE_FILE_NODE:
1274 case PM_SOURCE_LINE_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1287 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1392 case PM_REGULAR_EXPRESSION_NODE:
1393 case PM_SOURCE_ENCODING_NODE:
1394 case PM_SOURCE_FILE_NODE:
1395 case PM_SOURCE_LINE_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1477 case PM_REGULAR_EXPRESSION_NODE:
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1482 node->type = PM_MATCH_LAST_LINE_NODE;
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1489 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1494 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1511 case PM_SOURCE_FILE_NODE:
1512 case PM_INTERPOLATED_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1516 case PM_INTERPOLATED_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1519 case PM_SOURCE_LINE_NODE:
1520 case PM_SOURCE_ENCODING_NODE:
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1526 case PM_CLASS_VARIABLE_WRITE_NODE:
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1529 case PM_CONSTANT_WRITE_NODE:
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1532 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1535 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1538 case PM_LOCAL_VARIABLE_WRITE_NODE:
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1541 case PM_MULTI_WRITE_NODE:
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2629
2635static pm_call_node_t *
2636pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2637 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2638
2639 *node = (pm_call_node_t) {
2640 {
2641 .type = PM_CALL_NODE,
2642 .flags = flags,
2643 .node_id = PM_NODE_IDENTIFY(parser),
2644 .location = PM_LOCATION_NULL_VALUE(parser),
2645 },
2646 .receiver = NULL,
2647 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2648 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .arguments = NULL,
2651 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2652 .block = NULL,
2653 .name = 0
2654 };
2655
2656 return node;
2657}
2658
2663static inline pm_node_flags_t
2664pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2665 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2666}
2667
2672static pm_call_node_t *
2673pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2674 pm_assert_value_expression(parser, receiver);
2675
2676 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2677 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2678 flags |= PM_CALL_NODE_FLAGS_INDEX;
2679 }
2680
2681 pm_call_node_t *node = pm_call_node_create(parser, flags);
2682
2683 node->base.location.start = receiver->location.start;
2684 node->base.location.end = pm_arguments_end(arguments);
2685
2686 node->receiver = receiver;
2687 node->message_loc.start = arguments->opening_loc.start;
2688 node->message_loc.end = arguments->closing_loc.end;
2689
2690 node->opening_loc = arguments->opening_loc;
2691 node->arguments = arguments->arguments;
2692 node->closing_loc = arguments->closing_loc;
2693 node->block = arguments->block;
2694
2695 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2696 return node;
2697}
2698
2702static pm_call_node_t *
2703pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2704 pm_assert_value_expression(parser, receiver);
2705 pm_assert_value_expression(parser, argument);
2706
2707 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2708
2709 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2710 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2711
2712 node->receiver = receiver;
2713 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2714
2715 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2716 pm_arguments_node_arguments_append(arguments, argument);
2717 node->arguments = arguments;
2718
2719 node->name = pm_parser_constant_id_token(parser, operator);
2720 return node;
2721}
2722
2726static pm_call_node_t *
2727pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2728 pm_assert_value_expression(parser, receiver);
2729
2730 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2731
2732 node->base.location.start = receiver->location.start;
2733 const uint8_t *end = pm_arguments_end(arguments);
2734 if (end == NULL) {
2735 end = message->end;
2736 }
2737 node->base.location.end = end;
2738
2739 node->receiver = receiver;
2740 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2741 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2742 node->opening_loc = arguments->opening_loc;
2743 node->arguments = arguments->arguments;
2744 node->closing_loc = arguments->closing_loc;
2745 node->block = arguments->block;
2746
2747 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2748 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2749 }
2750
2751 node->name = pm_parser_constant_id_token(parser, message);
2752 return node;
2753}
2754
2758static pm_call_node_t *
2759pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2760 pm_call_node_t *node = pm_call_node_create(parser, 0);
2761 node->base.location.start = parser->start;
2762 node->base.location.end = parser->end;
2763
2764 node->receiver = receiver;
2765 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2766 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->arguments = arguments;
2768
2769 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2770 return node;
2771}
2772
2777static pm_call_node_t *
2778pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2779 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2780
2781 node->base.location.start = message->start;
2782 node->base.location.end = pm_arguments_end(arguments);
2783
2784 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2785 node->opening_loc = arguments->opening_loc;
2786 node->arguments = arguments->arguments;
2787 node->closing_loc = arguments->closing_loc;
2788 node->block = arguments->block;
2789
2790 node->name = pm_parser_constant_id_token(parser, message);
2791 return node;
2792}
2793
2798static pm_call_node_t *
2799pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2800 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2801
2802 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2803 node->arguments = arguments;
2804
2805 node->name = name;
2806 return node;
2807}
2808
2812static pm_call_node_t *
2813pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2814 pm_assert_value_expression(parser, receiver);
2815 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2816
2817 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2818
2819 node->base.location.start = message->start;
2820 if (arguments->closing_loc.start != NULL) {
2821 node->base.location.end = arguments->closing_loc.end;
2822 } else {
2823 assert(receiver != NULL);
2824 node->base.location.end = receiver->location.end;
2825 }
2826
2827 node->receiver = receiver;
2828 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2829 node->opening_loc = arguments->opening_loc;
2830 node->arguments = arguments->arguments;
2831 node->closing_loc = arguments->closing_loc;
2832
2833 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2834 return node;
2835}
2836
2840static pm_call_node_t *
2841pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2842 pm_assert_value_expression(parser, receiver);
2843
2844 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2845
2846 node->base.location.start = receiver->location.start;
2847 node->base.location.end = pm_arguments_end(arguments);
2848
2849 node->receiver = receiver;
2850 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2851 node->opening_loc = arguments->opening_loc;
2852 node->arguments = arguments->arguments;
2853 node->closing_loc = arguments->closing_loc;
2854 node->block = arguments->block;
2855
2856 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2857 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2858 }
2859
2860 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2861 return node;
2862}
2863
2867static pm_call_node_t *
2868pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2869 pm_assert_value_expression(parser, receiver);
2870
2871 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2872
2873 node->base.location.start = operator->start;
2874 node->base.location.end = receiver->location.end;
2875
2876 node->receiver = receiver;
2877 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2878
2879 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2880 return node;
2881}
2882
2887static pm_call_node_t *
2888pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2889 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2890
2891 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2892 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2893
2894 node->name = pm_parser_constant_id_token(parser, message);
2895 return node;
2896}
2897
2902static inline bool
2903pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2904 return (
2905 (node->message_loc.start != NULL) &&
2906 (node->message_loc.end[-1] != '!') &&
2907 (node->message_loc.end[-1] != '?') &&
2908 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2909 (node->opening_loc.start == NULL) &&
2910 (node->arguments == NULL) &&
2911 (node->block == NULL)
2912 );
2913}
2914
2918static void
2919pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2920 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2921
2922 if (write_constant->length > 0) {
2923 size_t length = write_constant->length - 1;
2924
2925 void *memory = xmalloc(length);
2926 memcpy(memory, write_constant->start, length);
2927
2928 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2929 } else {
2930 // We can get here if the message was missing because of a syntax error.
2931 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2932 }
2933}
2934
2938static pm_call_and_write_node_t *
2939pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2940 assert(target->block == NULL);
2941 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2942 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2943
2944 *node = (pm_call_and_write_node_t) {
2945 {
2946 .type = PM_CALL_AND_WRITE_NODE,
2947 .flags = target->base.flags,
2948 .node_id = PM_NODE_IDENTIFY(parser),
2949 .location = {
2950 .start = target->base.location.start,
2951 .end = value->location.end
2952 }
2953 },
2954 .receiver = target->receiver,
2955 .call_operator_loc = target->call_operator_loc,
2956 .message_loc = target->message_loc,
2957 .read_name = 0,
2958 .write_name = target->name,
2959 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2960 .value = value
2961 };
2962
2963 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2964
2965 // Here we're going to free the target, since it is no longer necessary.
2966 // However, we don't want to call `pm_node_destroy` because we want to keep
2967 // around all of its children since we just reused them.
2968 xfree(target);
2969
2970 return node;
2971}
2972
2977static void
2978pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2980 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981 pm_node_t *node;
2982 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2983 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2984 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2985 break;
2986 }
2987 }
2988 }
2989
2990 if (block != NULL) {
2991 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2992 }
2993 }
2994}
2995
2999static pm_index_and_write_node_t *
3000pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3001 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3002 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3003
3004 pm_index_arguments_check(parser, target->arguments, target->block);
3005
3006 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3007 *node = (pm_index_and_write_node_t) {
3008 {
3009 .type = PM_INDEX_AND_WRITE_NODE,
3010 .flags = target->base.flags,
3011 .node_id = PM_NODE_IDENTIFY(parser),
3012 .location = {
3013 .start = target->base.location.start,
3014 .end = value->location.end
3015 }
3016 },
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .opening_loc = target->opening_loc,
3020 .arguments = target->arguments,
3021 .closing_loc = target->closing_loc,
3022 .block = (pm_block_argument_node_t *) target->block,
3023 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3024 .value = value
3025 };
3026
3027 // Here we're going to free the target, since it is no longer necessary.
3028 // However, we don't want to call `pm_node_destroy` because we want to keep
3029 // around all of its children since we just reused them.
3030 xfree(target);
3031
3032 return node;
3033}
3034
3038static pm_call_operator_write_node_t *
3039pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3040 assert(target->block == NULL);
3041 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3042
3043 *node = (pm_call_operator_write_node_t) {
3044 {
3045 .type = PM_CALL_OPERATOR_WRITE_NODE,
3046 .flags = target->base.flags,
3047 .node_id = PM_NODE_IDENTIFY(parser),
3048 .location = {
3049 .start = target->base.location.start,
3050 .end = value->location.end
3051 }
3052 },
3053 .receiver = target->receiver,
3054 .call_operator_loc = target->call_operator_loc,
3055 .message_loc = target->message_loc,
3056 .read_name = 0,
3057 .write_name = target->name,
3058 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3059 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3060 .value = value
3061 };
3062
3063 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3064
3065 // Here we're going to free the target, since it is no longer necessary.
3066 // However, we don't want to call `pm_node_destroy` because we want to keep
3067 // around all of its children since we just reused them.
3068 xfree(target);
3069
3070 return node;
3071}
3072
3076static pm_index_operator_write_node_t *
3077pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3078 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3079
3080 pm_index_arguments_check(parser, target->arguments, target->block);
3081
3082 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3083 *node = (pm_index_operator_write_node_t) {
3084 {
3085 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3086 .flags = target->base.flags,
3087 .node_id = PM_NODE_IDENTIFY(parser),
3088 .location = {
3089 .start = target->base.location.start,
3090 .end = value->location.end
3091 }
3092 },
3093 .receiver = target->receiver,
3094 .call_operator_loc = target->call_operator_loc,
3095 .opening_loc = target->opening_loc,
3096 .arguments = target->arguments,
3097 .closing_loc = target->closing_loc,
3098 .block = (pm_block_argument_node_t *) target->block,
3099 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3100 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3101 .value = value
3102 };
3103
3104 // Here we're going to free the target, since it is no longer necessary.
3105 // However, we don't want to call `pm_node_destroy` because we want to keep
3106 // around all of its children since we just reused them.
3107 xfree(target);
3108
3109 return node;
3110}
3111
3115static pm_call_or_write_node_t *
3116pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3117 assert(target->block == NULL);
3118 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3119 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3120
3121 *node = (pm_call_or_write_node_t) {
3122 {
3123 .type = PM_CALL_OR_WRITE_NODE,
3124 .flags = target->base.flags,
3125 .node_id = PM_NODE_IDENTIFY(parser),
3126 .location = {
3127 .start = target->base.location.start,
3128 .end = value->location.end
3129 }
3130 },
3131 .receiver = target->receiver,
3132 .call_operator_loc = target->call_operator_loc,
3133 .message_loc = target->message_loc,
3134 .read_name = 0,
3135 .write_name = target->name,
3136 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3137 .value = value
3138 };
3139
3140 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3141
3142 // Here we're going to free the target, since it is no longer necessary.
3143 // However, we don't want to call `pm_node_destroy` because we want to keep
3144 // around all of its children since we just reused them.
3145 xfree(target);
3146
3147 return node;
3148}
3149
3153static pm_index_or_write_node_t *
3154pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3155 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3156 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3157
3158 pm_index_arguments_check(parser, target->arguments, target->block);
3159
3160 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3161 *node = (pm_index_or_write_node_t) {
3162 {
3163 .type = PM_INDEX_OR_WRITE_NODE,
3164 .flags = target->base.flags,
3165 .node_id = PM_NODE_IDENTIFY(parser),
3166 .location = {
3167 .start = target->base.location.start,
3168 .end = value->location.end
3169 }
3170 },
3171 .receiver = target->receiver,
3172 .call_operator_loc = target->call_operator_loc,
3173 .opening_loc = target->opening_loc,
3174 .arguments = target->arguments,
3175 .closing_loc = target->closing_loc,
3176 .block = (pm_block_argument_node_t *) target->block,
3177 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3178 .value = value
3179 };
3180
3181 // Here we're going to free the target, since it is no longer necessary.
3182 // However, we don't want to call `pm_node_destroy` because we want to keep
3183 // around all of its children since we just reused them.
3184 xfree(target);
3185
3186 return node;
3187}
3188
3193static pm_call_target_node_t *
3194pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3195 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3196
3197 *node = (pm_call_target_node_t) {
3198 {
3199 .type = PM_CALL_TARGET_NODE,
3200 .flags = target->base.flags,
3201 .node_id = PM_NODE_IDENTIFY(parser),
3202 .location = target->base.location
3203 },
3204 .receiver = target->receiver,
3205 .call_operator_loc = target->call_operator_loc,
3206 .name = target->name,
3207 .message_loc = target->message_loc
3208 };
3209
3210 // Here we're going to free the target, since it is no longer necessary.
3211 // However, we don't want to call `pm_node_destroy` because we want to keep
3212 // around all of its children since we just reused them.
3213 xfree(target);
3214
3215 return node;
3216}
3217
3222static pm_index_target_node_t *
3223pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3224 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3225 pm_node_flags_t flags = target->base.flags;
3226
3227 pm_index_arguments_check(parser, target->arguments, target->block);
3228
3229 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3230 *node = (pm_index_target_node_t) {
3231 {
3232 .type = PM_INDEX_TARGET_NODE,
3233 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3234 .node_id = PM_NODE_IDENTIFY(parser),
3235 .location = target->base.location
3236 },
3237 .receiver = target->receiver,
3238 .opening_loc = target->opening_loc,
3239 .arguments = target->arguments,
3240 .closing_loc = target->closing_loc,
3241 .block = (pm_block_argument_node_t *) target->block,
3242 };
3243
3244 // Here we're going to free the target, since it is no longer necessary.
3245 // However, we don't want to call `pm_node_destroy` because we want to keep
3246 // around all of its children since we just reused them.
3247 xfree(target);
3248
3249 return node;
3250}
3251
3255static pm_capture_pattern_node_t *
3256pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3257 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3258
3259 *node = (pm_capture_pattern_node_t) {
3260 {
3261 .type = PM_CAPTURE_PATTERN_NODE,
3262 .node_id = PM_NODE_IDENTIFY(parser),
3263 .location = {
3264 .start = value->location.start,
3265 .end = target->base.location.end
3266 },
3267 },
3268 .value = value,
3269 .target = target,
3270 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3271 };
3272
3273 return node;
3274}
3275
3279static pm_case_node_t *
3280pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3281 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3282
3283 *node = (pm_case_node_t) {
3284 {
3285 .type = PM_CASE_NODE,
3286 .node_id = PM_NODE_IDENTIFY(parser),
3287 .location = {
3288 .start = case_keyword->start,
3289 .end = end_keyword->end
3290 },
3291 },
3292 .predicate = predicate,
3293 .else_clause = NULL,
3294 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3295 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3296 .conditions = { 0 }
3297 };
3298
3299 return node;
3300}
3301
3305static void
3306pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3307 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3308
3309 pm_node_list_append(&node->conditions, condition);
3310 node->base.location.end = condition->location.end;
3311}
3312
3316static void
3317pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3318 node->else_clause = else_clause;
3319 node->base.location.end = else_clause->base.location.end;
3320}
3321
3325static void
3326pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3327 node->base.location.end = end_keyword->end;
3328 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3329}
3330
3334static pm_case_match_node_t *
3335pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3336 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3337
3338 *node = (pm_case_match_node_t) {
3339 {
3340 .type = PM_CASE_MATCH_NODE,
3341 .node_id = PM_NODE_IDENTIFY(parser),
3342 .location = {
3343 .start = case_keyword->start,
3344 .end = end_keyword->end
3345 },
3346 },
3347 .predicate = predicate,
3348 .else_clause = NULL,
3349 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3350 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3351 .conditions = { 0 }
3352 };
3353
3354 return node;
3355}
3356
3360static void
3361pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3362 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3363
3364 pm_node_list_append(&node->conditions, condition);
3365 node->base.location.end = condition->location.end;
3366}
3367
3371static void
3372pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3373 node->else_clause = else_clause;
3374 node->base.location.end = else_clause->base.location.end;
3375}
3376
3380static void
3381pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3382 node->base.location.end = end_keyword->end;
3383 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3384}
3385
3389static pm_class_node_t *
3390pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3391 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3392
3393 *node = (pm_class_node_t) {
3394 {
3395 .type = PM_CLASS_NODE,
3396 .node_id = PM_NODE_IDENTIFY(parser),
3397 .location = { .start = class_keyword->start, .end = end_keyword->end },
3398 },
3399 .locals = *locals,
3400 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3401 .constant_path = constant_path,
3402 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3403 .superclass = superclass,
3404 .body = body,
3405 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3406 .name = pm_parser_constant_id_token(parser, name)
3407 };
3408
3409 return node;
3410}
3411
3415static pm_class_variable_and_write_node_t *
3416pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3417 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3418 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3419
3420 *node = (pm_class_variable_and_write_node_t) {
3421 {
3422 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3423 .node_id = PM_NODE_IDENTIFY(parser),
3424 .location = {
3425 .start = target->base.location.start,
3426 .end = value->location.end
3427 }
3428 },
3429 .name = target->name,
3430 .name_loc = target->base.location,
3431 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3432 .value = value
3433 };
3434
3435 return node;
3436}
3437
3441static pm_class_variable_operator_write_node_t *
3442pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3443 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3444
3445 *node = (pm_class_variable_operator_write_node_t) {
3446 {
3447 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3448 .node_id = PM_NODE_IDENTIFY(parser),
3449 .location = {
3450 .start = target->base.location.start,
3451 .end = value->location.end
3452 }
3453 },
3454 .name = target->name,
3455 .name_loc = target->base.location,
3456 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3457 .value = value,
3458 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3459 };
3460
3461 return node;
3462}
3463
3467static pm_class_variable_or_write_node_t *
3468pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3469 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3470 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3471
3472 *node = (pm_class_variable_or_write_node_t) {
3473 {
3474 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3475 .node_id = PM_NODE_IDENTIFY(parser),
3476 .location = {
3477 .start = target->base.location.start,
3478 .end = value->location.end
3479 }
3480 },
3481 .name = target->name,
3482 .name_loc = target->base.location,
3483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3484 .value = value
3485 };
3486
3487 return node;
3488}
3489
3493static pm_class_variable_read_node_t *
3494pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3495 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3496 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3497
3498 *node = (pm_class_variable_read_node_t) {
3499 {
3500 .type = PM_CLASS_VARIABLE_READ_NODE,
3501 .node_id = PM_NODE_IDENTIFY(parser),
3502 .location = PM_LOCATION_TOKEN_VALUE(token)
3503 },
3504 .name = pm_parser_constant_id_token(parser, token)
3505 };
3506
3507 return node;
3508}
3509
3516static inline pm_node_flags_t
3517pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3518 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3519 return flags;
3520 }
3521 return 0;
3522}
3523
3527static pm_class_variable_write_node_t *
3528pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3529 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3530
3531 *node = (pm_class_variable_write_node_t) {
3532 {
3533 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3534 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3535 .node_id = PM_NODE_IDENTIFY(parser),
3536 .location = {
3537 .start = read_node->base.location.start,
3538 .end = value->location.end
3539 },
3540 },
3541 .name = read_node->name,
3542 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3543 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3544 .value = value
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_path_and_write_node_t *
3554pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3556 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3557
3558 *node = (pm_constant_path_and_write_node_t) {
3559 {
3560 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3561 .node_id = PM_NODE_IDENTIFY(parser),
3562 .location = {
3563 .start = target->base.location.start,
3564 .end = value->location.end
3565 }
3566 },
3567 .target = target,
3568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3569 .value = value
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_path_operator_write_node_t *
3579pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3581
3582 *node = (pm_constant_path_operator_write_node_t) {
3583 {
3584 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3585 .node_id = PM_NODE_IDENTIFY(parser),
3586 .location = {
3587 .start = target->base.location.start,
3588 .end = value->location.end
3589 }
3590 },
3591 .target = target,
3592 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3593 .value = value,
3594 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3595 };
3596
3597 return node;
3598}
3599
3603static pm_constant_path_or_write_node_t *
3604pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3605 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3606 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3607
3608 *node = (pm_constant_path_or_write_node_t) {
3609 {
3610 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3611 .node_id = PM_NODE_IDENTIFY(parser),
3612 .location = {
3613 .start = target->base.location.start,
3614 .end = value->location.end
3615 }
3616 },
3617 .target = target,
3618 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3619 .value = value
3620 };
3621
3622 return node;
3623}
3624
3628static pm_constant_path_node_t *
3629pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3630 pm_assert_value_expression(parser, parent);
3631 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3632
3633 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3634 if (name_token->type == PM_TOKEN_CONSTANT) {
3635 name = pm_parser_constant_id_token(parser, name_token);
3636 }
3637
3638 *node = (pm_constant_path_node_t) {
3639 {
3640 .type = PM_CONSTANT_PATH_NODE,
3641 .node_id = PM_NODE_IDENTIFY(parser),
3642 .location = {
3643 .start = parent == NULL ? delimiter->start : parent->location.start,
3644 .end = name_token->end
3645 },
3646 },
3647 .parent = parent,
3648 .name = name,
3649 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3650 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3651 };
3652
3653 return node;
3654}
3655
3659static pm_constant_path_write_node_t *
3660pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3661 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3662
3663 *node = (pm_constant_path_write_node_t) {
3664 {
3665 .type = PM_CONSTANT_PATH_WRITE_NODE,
3666 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3667 .node_id = PM_NODE_IDENTIFY(parser),
3668 .location = {
3669 .start = target->base.location.start,
3670 .end = value->location.end
3671 },
3672 },
3673 .target = target,
3674 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3675 .value = value
3676 };
3677
3678 return node;
3679}
3680
3684static pm_constant_and_write_node_t *
3685pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3687 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3688
3689 *node = (pm_constant_and_write_node_t) {
3690 {
3691 .type = PM_CONSTANT_AND_WRITE_NODE,
3692 .node_id = PM_NODE_IDENTIFY(parser),
3693 .location = {
3694 .start = target->base.location.start,
3695 .end = value->location.end
3696 }
3697 },
3698 .name = target->name,
3699 .name_loc = target->base.location,
3700 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3701 .value = value
3702 };
3703
3704 return node;
3705}
3706
3710static pm_constant_operator_write_node_t *
3711pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3712 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3713
3714 *node = (pm_constant_operator_write_node_t) {
3715 {
3716 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3717 .node_id = PM_NODE_IDENTIFY(parser),
3718 .location = {
3719 .start = target->base.location.start,
3720 .end = value->location.end
3721 }
3722 },
3723 .name = target->name,
3724 .name_loc = target->base.location,
3725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3726 .value = value,
3727 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3728 };
3729
3730 return node;
3731}
3732
3736static pm_constant_or_write_node_t *
3737pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3738 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3739 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3740
3741 *node = (pm_constant_or_write_node_t) {
3742 {
3743 .type = PM_CONSTANT_OR_WRITE_NODE,
3744 .node_id = PM_NODE_IDENTIFY(parser),
3745 .location = {
3746 .start = target->base.location.start,
3747 .end = value->location.end
3748 }
3749 },
3750 .name = target->name,
3751 .name_loc = target->base.location,
3752 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3753 .value = value
3754 };
3755
3756 return node;
3757}
3758
3762static pm_constant_read_node_t *
3763pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3764 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3765 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3766
3767 *node = (pm_constant_read_node_t) {
3768 {
3769 .type = PM_CONSTANT_READ_NODE,
3770 .node_id = PM_NODE_IDENTIFY(parser),
3771 .location = PM_LOCATION_TOKEN_VALUE(name)
3772 },
3773 .name = pm_parser_constant_id_token(parser, name)
3774 };
3775
3776 return node;
3777}
3778
3782static pm_constant_write_node_t *
3783pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3784 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3785
3786 *node = (pm_constant_write_node_t) {
3787 {
3788 .type = PM_CONSTANT_WRITE_NODE,
3789 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3790 .node_id = PM_NODE_IDENTIFY(parser),
3791 .location = {
3792 .start = target->base.location.start,
3793 .end = value->location.end
3794 }
3795 },
3796 .name = target->name,
3797 .name_loc = target->base.location,
3798 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3799 .value = value
3800 };
3801
3802 return node;
3803}
3804
3808static void
3809pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3810 switch (PM_NODE_TYPE(node)) {
3811 case PM_BEGIN_NODE: {
3812 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3813 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3814 break;
3815 }
3816 case PM_PARENTHESES_NODE: {
3817 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3818 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3819 break;
3820 }
3821 case PM_STATEMENTS_NODE: {
3822 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3823 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3824 break;
3825 }
3826 case PM_ARRAY_NODE:
3827 case PM_FLOAT_NODE:
3828 case PM_IMAGINARY_NODE:
3829 case PM_INTEGER_NODE:
3830 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3831 case PM_INTERPOLATED_STRING_NODE:
3832 case PM_INTERPOLATED_SYMBOL_NODE:
3833 case PM_INTERPOLATED_X_STRING_NODE:
3834 case PM_RATIONAL_NODE:
3835 case PM_REGULAR_EXPRESSION_NODE:
3836 case PM_SOURCE_ENCODING_NODE:
3837 case PM_SOURCE_FILE_NODE:
3838 case PM_SOURCE_LINE_NODE:
3839 case PM_STRING_NODE:
3840 case PM_SYMBOL_NODE:
3841 case PM_X_STRING_NODE:
3842 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3843 break;
3844 default:
3845 break;
3846 }
3847}
3848
3852static pm_def_node_t *
3853pm_def_node_create(
3854 pm_parser_t *parser,
3855 pm_constant_id_t name,
3856 const pm_token_t *name_loc,
3857 pm_node_t *receiver,
3858 pm_parameters_node_t *parameters,
3859 pm_node_t *body,
3860 pm_constant_id_list_t *locals,
3861 const pm_token_t *def_keyword,
3862 const pm_token_t *operator,
3863 const pm_token_t *lparen,
3864 const pm_token_t *rparen,
3865 const pm_token_t *equal,
3866 const pm_token_t *end_keyword
3867) {
3868 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3869 const uint8_t *end;
3870
3871 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3872 end = body->location.end;
3873 } else {
3874 end = end_keyword->end;
3875 }
3876
3877 if (receiver != NULL) {
3878 pm_def_node_receiver_check(parser, receiver);
3879 }
3880
3881 *node = (pm_def_node_t) {
3882 {
3883 .type = PM_DEF_NODE,
3884 .node_id = PM_NODE_IDENTIFY(parser),
3885 .location = { .start = def_keyword->start, .end = end },
3886 },
3887 .name = name,
3888 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3889 .receiver = receiver,
3890 .parameters = parameters,
3891 .body = body,
3892 .locals = *locals,
3893 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3894 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3895 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3896 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3897 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3898 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3899 };
3900
3901 return node;
3902}
3903
3907static pm_defined_node_t *
3908pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3909 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3910
3911 *node = (pm_defined_node_t) {
3912 {
3913 .type = PM_DEFINED_NODE,
3914 .node_id = PM_NODE_IDENTIFY(parser),
3915 .location = {
3916 .start = keyword_loc->start,
3917 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3918 },
3919 },
3920 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3921 .value = value,
3922 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3923 .keyword_loc = *keyword_loc
3924 };
3925
3926 return node;
3927}
3928
3932static pm_else_node_t *
3933pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3934 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3935 const uint8_t *end = NULL;
3936 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3937 end = statements->base.location.end;
3938 } else {
3939 end = end_keyword->end;
3940 }
3941
3942 *node = (pm_else_node_t) {
3943 {
3944 .type = PM_ELSE_NODE,
3945 .node_id = PM_NODE_IDENTIFY(parser),
3946 .location = {
3947 .start = else_keyword->start,
3948 .end = end,
3949 },
3950 },
3951 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3952 .statements = statements,
3953 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3954 };
3955
3956 return node;
3957}
3958
3962static pm_embedded_statements_node_t *
3963pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3964 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3965
3966 *node = (pm_embedded_statements_node_t) {
3967 {
3968 .type = PM_EMBEDDED_STATEMENTS_NODE,
3969 .node_id = PM_NODE_IDENTIFY(parser),
3970 .location = {
3971 .start = opening->start,
3972 .end = closing->end
3973 }
3974 },
3975 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3976 .statements = statements,
3977 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3978 };
3979
3980 return node;
3981}
3982
3986static pm_embedded_variable_node_t *
3987pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3988 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3989
3990 *node = (pm_embedded_variable_node_t) {
3991 {
3992 .type = PM_EMBEDDED_VARIABLE_NODE,
3993 .node_id = PM_NODE_IDENTIFY(parser),
3994 .location = {
3995 .start = operator->start,
3996 .end = variable->location.end
3997 }
3998 },
3999 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4000 .variable = variable
4001 };
4002
4003 return node;
4004}
4005
4009static pm_ensure_node_t *
4010pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4011 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4012
4013 *node = (pm_ensure_node_t) {
4014 {
4015 .type = PM_ENSURE_NODE,
4016 .node_id = PM_NODE_IDENTIFY(parser),
4017 .location = {
4018 .start = ensure_keyword->start,
4019 .end = end_keyword->end
4020 },
4021 },
4022 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4023 .statements = statements,
4024 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4025 };
4026
4027 return node;
4028}
4029
4033static pm_false_node_t *
4034pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4035 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4036 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4037
4038 *node = (pm_false_node_t) {{
4039 .type = PM_FALSE_NODE,
4040 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4041 .node_id = PM_NODE_IDENTIFY(parser),
4042 .location = PM_LOCATION_TOKEN_VALUE(token)
4043 }};
4044
4045 return node;
4046}
4047
4052static pm_find_pattern_node_t *
4053pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4054 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4055
4056 pm_node_t *left = nodes->nodes[0];
4057 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4058 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4059
4060 pm_node_t *right;
4061
4062 if (nodes->size == 1) {
4063 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4064 } else {
4065 right = nodes->nodes[nodes->size - 1];
4066 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4067 }
4068
4069#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4070 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4071 // The resulting AST will anyway be ignored, but this file still needs to compile.
4072 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4073#else
4074 pm_node_t *right_splat_node = right;
4075#endif
4076 *node = (pm_find_pattern_node_t) {
4077 {
4078 .type = PM_FIND_PATTERN_NODE,
4079 .node_id = PM_NODE_IDENTIFY(parser),
4080 .location = {
4081 .start = left->location.start,
4082 .end = right->location.end,
4083 },
4084 },
4085 .constant = NULL,
4086 .left = left_splat_node,
4087 .right = right_splat_node,
4088 .requireds = { 0 },
4089 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4090 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4091 };
4092
4093 // For now we're going to just copy over each pointer manually. This could be
4094 // much more efficient, as we could instead resize the node list to only point
4095 // to 1...-1.
4096 for (size_t index = 1; index < nodes->size - 1; index++) {
4097 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4098 }
4099
4100 return node;
4101}
4102
4107static double
4108pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4109 ptrdiff_t diff = token->end - token->start;
4110 if (diff <= 0) return 0.0;
4111
4112 // First, get a buffer of the content.
4113 size_t length = (size_t) diff;
4114 char *buffer = xmalloc(sizeof(char) * (length + 1));
4115 memcpy((void *) buffer, token->start, length);
4116
4117 // Next, determine if we need to replace the decimal point because of
4118 // locale-specific options, and then normalize them if we have to.
4119 char decimal_point = *localeconv()->decimal_point;
4120 if (decimal_point != '.') {
4121 for (size_t index = 0; index < length; index++) {
4122 if (buffer[index] == '.') buffer[index] = decimal_point;
4123 }
4124 }
4125
4126 // Next, handle underscores by removing them from the buffer.
4127 for (size_t index = 0; index < length; index++) {
4128 if (buffer[index] == '_') {
4129 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4130 length--;
4131 }
4132 }
4133
4134 // Null-terminate the buffer so that strtod cannot read off the end.
4135 buffer[length] = '\0';
4136
4137 // Now, call strtod to parse the value. Note that CRuby has their own
4138 // version of strtod which avoids locales. We're okay using the locale-aware
4139 // version because we've already validated through the parser that the token
4140 // is in a valid format.
4141 errno = 0;
4142 char *eptr;
4143 double value = strtod(buffer, &eptr);
4144
4145 // This should never happen, because we've already checked that the token
4146 // is in a valid format. However it's good to be safe.
4147 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4148 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4149 xfree((void *) buffer);
4150 return 0.0;
4151 }
4152
4153 // If errno is set, then it should only be ERANGE. At this point we need to
4154 // check if it's infinity (it should be).
4155 if (errno == ERANGE && PRISM_ISINF(value)) {
4156 int warn_width;
4157 const char *ellipsis;
4158
4159 if (length > 20) {
4160 warn_width = 20;
4161 ellipsis = "...";
4162 } else {
4163 warn_width = (int) length;
4164 ellipsis = "";
4165 }
4166
4167 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4168 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4169 }
4170
4171 // Finally we can free the buffer and return the value.
4172 xfree((void *) buffer);
4173 return value;
4174}
4175
4179static pm_float_node_t *
4180pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4181 assert(token->type == PM_TOKEN_FLOAT);
4182 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4183
4184 *node = (pm_float_node_t) {
4185 {
4186 .type = PM_FLOAT_NODE,
4187 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4188 .node_id = PM_NODE_IDENTIFY(parser),
4189 .location = PM_LOCATION_TOKEN_VALUE(token)
4190 },
4191 .value = pm_double_parse(parser, token)
4192 };
4193
4194 return node;
4195}
4196
4200static pm_imaginary_node_t *
4201pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4202 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4203
4204 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4205 *node = (pm_imaginary_node_t) {
4206 {
4207 .type = PM_IMAGINARY_NODE,
4208 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4209 .node_id = PM_NODE_IDENTIFY(parser),
4210 .location = PM_LOCATION_TOKEN_VALUE(token)
4211 },
4212 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4213 .type = PM_TOKEN_FLOAT,
4214 .start = token->start,
4215 .end = token->end - 1
4216 }))
4217 };
4218
4219 return node;
4220}
4221
4225static pm_rational_node_t *
4226pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4227 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4228
4229 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4230 *node = (pm_rational_node_t) {
4231 {
4232 .type = PM_RATIONAL_NODE,
4233 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4234 .node_id = PM_NODE_IDENTIFY(parser),
4235 .location = PM_LOCATION_TOKEN_VALUE(token)
4236 },
4237 .numerator = { 0 },
4238 .denominator = { 0 }
4239 };
4240
4241 const uint8_t *start = token->start;
4242 const uint8_t *end = token->end - 1; // r
4243
4244 while (start < end && *start == '0') start++; // 0.1 -> .1
4245 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4246
4247 size_t length = (size_t) (end - start);
4248 if (length == 1) {
4249 node->denominator.value = 1;
4250 return node;
4251 }
4252
4253 const uint8_t *point = memchr(start, '.', length);
4254 assert(point && "should have a decimal point");
4255
4256 uint8_t *digits = xmalloc(length);
4257 if (digits == NULL) {
4258 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259 abort();
4260 }
4261
4262 memcpy(digits, start, (unsigned long) (point - start));
4263 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4264 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4265
4266 digits[0] = '1';
4267 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269 xfree(digits);
4270
4271 pm_integers_reduce(&node->numerator, &node->denominator);
4272 return node;
4273}
4274
4279static pm_imaginary_node_t *
4280pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4281 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4282
4283 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4284 *node = (pm_imaginary_node_t) {
4285 {
4286 .type = PM_IMAGINARY_NODE,
4287 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4288 .node_id = PM_NODE_IDENTIFY(parser),
4289 .location = PM_LOCATION_TOKEN_VALUE(token)
4290 },
4291 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4292 .type = PM_TOKEN_FLOAT_RATIONAL,
4293 .start = token->start,
4294 .end = token->end - 1
4295 }))
4296 };
4297
4298 return node;
4299}
4300
4304static pm_for_node_t *
4305pm_for_node_create(
4306 pm_parser_t *parser,
4307 pm_node_t *index,
4308 pm_node_t *collection,
4309 pm_statements_node_t *statements,
4310 const pm_token_t *for_keyword,
4311 const pm_token_t *in_keyword,
4312 const pm_token_t *do_keyword,
4313 const pm_token_t *end_keyword
4314) {
4315 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4316
4317 *node = (pm_for_node_t) {
4318 {
4319 .type = PM_FOR_NODE,
4320 .node_id = PM_NODE_IDENTIFY(parser),
4321 .location = {
4322 .start = for_keyword->start,
4323 .end = end_keyword->end
4324 },
4325 },
4326 .index = index,
4327 .collection = collection,
4328 .statements = statements,
4329 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4330 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4331 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4332 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4333 };
4334
4335 return node;
4336}
4337
4341static pm_forwarding_arguments_node_t *
4342pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4343 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4344 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4345
4346 *node = (pm_forwarding_arguments_node_t) {{
4347 .type = PM_FORWARDING_ARGUMENTS_NODE,
4348 .node_id = PM_NODE_IDENTIFY(parser),
4349 .location = PM_LOCATION_TOKEN_VALUE(token)
4350 }};
4351
4352 return node;
4353}
4354
4358static pm_forwarding_parameter_node_t *
4359pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4360 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4361 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4362
4363 *node = (pm_forwarding_parameter_node_t) {{
4364 .type = PM_FORWARDING_PARAMETER_NODE,
4365 .node_id = PM_NODE_IDENTIFY(parser),
4366 .location = PM_LOCATION_TOKEN_VALUE(token)
4367 }};
4368
4369 return node;
4370}
4371
4375static pm_forwarding_super_node_t *
4376pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4377 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4378 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4379 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4380
4381 pm_block_node_t *block = NULL;
4382 if (arguments->block != NULL) {
4383 block = (pm_block_node_t *) arguments->block;
4384 }
4385
4386 *node = (pm_forwarding_super_node_t) {
4387 {
4388 .type = PM_FORWARDING_SUPER_NODE,
4389 .node_id = PM_NODE_IDENTIFY(parser),
4390 .location = {
4391 .start = token->start,
4392 .end = block != NULL ? block->base.location.end : token->end
4393 },
4394 },
4395 .block = block
4396 };
4397
4398 return node;
4399}
4400
4405static pm_hash_pattern_node_t *
4406pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4407 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4408
4409 *node = (pm_hash_pattern_node_t) {
4410 {
4411 .type = PM_HASH_PATTERN_NODE,
4412 .node_id = PM_NODE_IDENTIFY(parser),
4413 .location = {
4414 .start = opening->start,
4415 .end = closing->end
4416 },
4417 },
4418 .constant = NULL,
4419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4421 .elements = { 0 },
4422 .rest = NULL
4423 };
4424
4425 return node;
4426}
4427
4431static pm_hash_pattern_node_t *
4432pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4433 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4434
4435 const uint8_t *start;
4436 const uint8_t *end;
4437
4438 if (elements->size > 0) {
4439 if (rest) {
4440 start = elements->nodes[0]->location.start;
4441 end = rest->location.end;
4442 } else {
4443 start = elements->nodes[0]->location.start;
4444 end = elements->nodes[elements->size - 1]->location.end;
4445 }
4446 } else {
4447 assert(rest != NULL);
4448 start = rest->location.start;
4449 end = rest->location.end;
4450 }
4451
4452 *node = (pm_hash_pattern_node_t) {
4453 {
4454 .type = PM_HASH_PATTERN_NODE,
4455 .node_id = PM_NODE_IDENTIFY(parser),
4456 .location = {
4457 .start = start,
4458 .end = end
4459 },
4460 },
4461 .constant = NULL,
4462 .elements = { 0 },
4463 .rest = rest,
4464 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4465 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4466 };
4467
4468 pm_node_t *element;
4469 PM_NODE_LIST_FOREACH(elements, index, element) {
4470 pm_node_list_append(&node->elements, element);
4471 }
4472
4473 return node;
4474}
4475
4479static pm_constant_id_t
4480pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4481 switch (PM_NODE_TYPE(target)) {
4482 case PM_GLOBAL_VARIABLE_READ_NODE:
4483 return ((pm_global_variable_read_node_t *) target)->name;
4484 case PM_BACK_REFERENCE_READ_NODE:
4485 return ((pm_back_reference_read_node_t *) target)->name;
4486 case PM_NUMBERED_REFERENCE_READ_NODE:
4487 // This will only ever happen in the event of a syntax error, but we
4488 // still need to provide something for the node.
4489 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4490 default:
4491 assert(false && "unreachable");
4492 return (pm_constant_id_t) -1;
4493 }
4494}
4495
4499static pm_global_variable_and_write_node_t *
4500pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4501 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4502 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4503
4504 *node = (pm_global_variable_and_write_node_t) {
4505 {
4506 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4507 .node_id = PM_NODE_IDENTIFY(parser),
4508 .location = {
4509 .start = target->location.start,
4510 .end = value->location.end
4511 }
4512 },
4513 .name = pm_global_variable_write_name(parser, target),
4514 .name_loc = target->location,
4515 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4516 .value = value
4517 };
4518
4519 return node;
4520}
4521
4525static pm_global_variable_operator_write_node_t *
4526pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4527 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4528
4529 *node = (pm_global_variable_operator_write_node_t) {
4530 {
4531 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4532 .node_id = PM_NODE_IDENTIFY(parser),
4533 .location = {
4534 .start = target->location.start,
4535 .end = value->location.end
4536 }
4537 },
4538 .name = pm_global_variable_write_name(parser, target),
4539 .name_loc = target->location,
4540 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4541 .value = value,
4542 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4543 };
4544
4545 return node;
4546}
4547
4551static pm_global_variable_or_write_node_t *
4552pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4554 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4555
4556 *node = (pm_global_variable_or_write_node_t) {
4557 {
4558 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4559 .node_id = PM_NODE_IDENTIFY(parser),
4560 .location = {
4561 .start = target->location.start,
4562 .end = value->location.end
4563 }
4564 },
4565 .name = pm_global_variable_write_name(parser, target),
4566 .name_loc = target->location,
4567 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4568 .value = value
4569 };
4570
4571 return node;
4572}
4573
4577static pm_global_variable_read_node_t *
4578pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4579 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4580
4581 *node = (pm_global_variable_read_node_t) {
4582 {
4583 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4584 .node_id = PM_NODE_IDENTIFY(parser),
4585 .location = PM_LOCATION_TOKEN_VALUE(name),
4586 },
4587 .name = pm_parser_constant_id_token(parser, name)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_global_variable_read_node_t *
4597pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4598 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4599
4600 *node = (pm_global_variable_read_node_t) {
4601 {
4602 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4603 .node_id = PM_NODE_IDENTIFY(parser),
4604 .location = PM_LOCATION_NULL_VALUE(parser)
4605 },
4606 .name = name
4607 };
4608
4609 return node;
4610}
4611
4615static pm_global_variable_write_node_t *
4616pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4618
4619 *node = (pm_global_variable_write_node_t) {
4620 {
4621 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4622 .node_id = PM_NODE_IDENTIFY(parser),
4623 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4624 .location = {
4625 .start = target->location.start,
4626 .end = value->location.end
4627 },
4628 },
4629 .name = pm_global_variable_write_name(parser, target),
4630 .name_loc = PM_LOCATION_NODE_VALUE(target),
4631 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4632 .value = value
4633 };
4634
4635 return node;
4636}
4637
4641static pm_global_variable_write_node_t *
4642pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4643 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4644
4645 *node = (pm_global_variable_write_node_t) {
4646 {
4647 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4648 .node_id = PM_NODE_IDENTIFY(parser),
4649 .location = PM_LOCATION_NULL_VALUE(parser)
4650 },
4651 .name = name,
4652 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4653 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .value = value
4655 };
4656
4657 return node;
4658}
4659
4663static pm_hash_node_t *
4664pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4665 assert(opening != NULL);
4666 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4667
4668 *node = (pm_hash_node_t) {
4669 {
4670 .type = PM_HASH_NODE,
4671 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4672 .node_id = PM_NODE_IDENTIFY(parser),
4673 .location = PM_LOCATION_TOKEN_VALUE(opening)
4674 },
4675 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4676 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4677 .elements = { 0 }
4678 };
4679
4680 return node;
4681}
4682
4686static inline void
4687pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4688 pm_node_list_append(&hash->elements, element);
4689
4690 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4691 if (static_literal) {
4692 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4693 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4694 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4696 }
4697
4698 if (!static_literal) {
4699 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4700 }
4701}
4702
4703static inline void
4704pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4705 hash->base.location.end = token->end;
4706 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4707}
4708
4712static pm_if_node_t *
4713pm_if_node_create(pm_parser_t *parser,
4714 const pm_token_t *if_keyword,
4715 pm_node_t *predicate,
4716 const pm_token_t *then_keyword,
4717 pm_statements_node_t *statements,
4718 pm_node_t *subsequent,
4719 const pm_token_t *end_keyword
4720) {
4721 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4722 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4723
4724 const uint8_t *end;
4725 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4726 end = end_keyword->end;
4727 } else if (subsequent != NULL) {
4728 end = subsequent->location.end;
4729 } else if (pm_statements_node_body_length(statements) != 0) {
4730 end = statements->base.location.end;
4731 } else {
4732 end = predicate->location.end;
4733 }
4734
4735 *node = (pm_if_node_t) {
4736 {
4737 .type = PM_IF_NODE,
4738 .flags = PM_NODE_FLAG_NEWLINE,
4739 .node_id = PM_NODE_IDENTIFY(parser),
4740 .location = {
4741 .start = if_keyword->start,
4742 .end = end
4743 },
4744 },
4745 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4746 .predicate = predicate,
4747 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4748 .statements = statements,
4749 .subsequent = subsequent,
4750 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4751 };
4752
4753 return node;
4754}
4755
4759static pm_if_node_t *
4760pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4761 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4762 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4763
4764 pm_statements_node_t *statements = pm_statements_node_create(parser);
4765 pm_statements_node_body_append(parser, statements, statement, true);
4766
4767 *node = (pm_if_node_t) {
4768 {
4769 .type = PM_IF_NODE,
4770 .flags = PM_NODE_FLAG_NEWLINE,
4771 .node_id = PM_NODE_IDENTIFY(parser),
4772 .location = {
4773 .start = statement->location.start,
4774 .end = predicate->location.end
4775 },
4776 },
4777 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4778 .predicate = predicate,
4779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4780 .statements = statements,
4781 .subsequent = NULL,
4782 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4783 };
4784
4785 return node;
4786}
4787
4791static pm_if_node_t *
4792pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4793 pm_assert_value_expression(parser, predicate);
4794 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4795
4796 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4797 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4798
4799 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4800 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4801
4802 pm_token_t end_keyword = not_provided(parser);
4803 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4804
4805 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4806
4807 *node = (pm_if_node_t) {
4808 {
4809 .type = PM_IF_NODE,
4810 .flags = PM_NODE_FLAG_NEWLINE,
4811 .node_id = PM_NODE_IDENTIFY(parser),
4812 .location = {
4813 .start = predicate->location.start,
4814 .end = false_expression->location.end,
4815 },
4816 },
4817 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4818 .predicate = predicate,
4819 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4820 .statements = if_statements,
4821 .subsequent = (pm_node_t *) else_node,
4822 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4823 };
4824
4825 return node;
4826
4827}
4828
4829static inline void
4830pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4831 node->base.location.end = keyword->end;
4832 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4833}
4834
4835static inline void
4836pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4837 node->base.location.end = keyword->end;
4838 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4839}
4840
4844static pm_implicit_node_t *
4845pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4846 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4847
4848 *node = (pm_implicit_node_t) {
4849 {
4850 .type = PM_IMPLICIT_NODE,
4851 .node_id = PM_NODE_IDENTIFY(parser),
4852 .location = value->location
4853 },
4854 .value = value
4855 };
4856
4857 return node;
4858}
4859
4863static pm_implicit_rest_node_t *
4864pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4865 assert(token->type == PM_TOKEN_COMMA);
4866
4867 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4868
4869 *node = (pm_implicit_rest_node_t) {
4870 {
4871 .type = PM_IMPLICIT_REST_NODE,
4872 .node_id = PM_NODE_IDENTIFY(parser),
4873 .location = PM_LOCATION_TOKEN_VALUE(token)
4874 }
4875 };
4876
4877 return node;
4878}
4879
4883static pm_integer_node_t *
4884pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4885 assert(token->type == PM_TOKEN_INTEGER);
4886 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4887
4888 *node = (pm_integer_node_t) {
4889 {
4890 .type = PM_INTEGER_NODE,
4891 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4892 .node_id = PM_NODE_IDENTIFY(parser),
4893 .location = PM_LOCATION_TOKEN_VALUE(token)
4894 },
4895 .value = { 0 }
4896 };
4897
4898 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4899 switch (base) {
4900 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4901 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4902 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4903 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4904 default: assert(false && "unreachable"); break;
4905 }
4906
4907 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4908 return node;
4909}
4910
4915static pm_imaginary_node_t *
4916pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4917 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4918
4919 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4920 *node = (pm_imaginary_node_t) {
4921 {
4922 .type = PM_IMAGINARY_NODE,
4923 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4924 .node_id = PM_NODE_IDENTIFY(parser),
4925 .location = PM_LOCATION_TOKEN_VALUE(token)
4926 },
4927 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4928 .type = PM_TOKEN_INTEGER,
4929 .start = token->start,
4930 .end = token->end - 1
4931 }))
4932 };
4933
4934 return node;
4935}
4936
4941static pm_rational_node_t *
4942pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4943 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4944
4945 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4946 *node = (pm_rational_node_t) {
4947 {
4948 .type = PM_RATIONAL_NODE,
4949 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4950 .node_id = PM_NODE_IDENTIFY(parser),
4951 .location = PM_LOCATION_TOKEN_VALUE(token)
4952 },
4953 .numerator = { 0 },
4954 .denominator = { .value = 1, 0 }
4955 };
4956
4957 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4958 switch (base) {
4959 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4960 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4961 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4962 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4963 default: assert(false && "unreachable"); break;
4964 }
4965
4966 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4967
4968 return node;
4969}
4970
4975static pm_imaginary_node_t *
4976pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4977 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4978
4979 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4980 *node = (pm_imaginary_node_t) {
4981 {
4982 .type = PM_IMAGINARY_NODE,
4983 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4984 .node_id = PM_NODE_IDENTIFY(parser),
4985 .location = PM_LOCATION_TOKEN_VALUE(token)
4986 },
4987 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4988 .type = PM_TOKEN_INTEGER_RATIONAL,
4989 .start = token->start,
4990 .end = token->end - 1
4991 }))
4992 };
4993
4994 return node;
4995}
4996
5000static pm_in_node_t *
5001pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5002 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5003
5004 const uint8_t *end;
5005 if (statements != NULL) {
5006 end = statements->base.location.end;
5007 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5008 end = then_keyword->end;
5009 } else {
5010 end = pattern->location.end;
5011 }
5012
5013 *node = (pm_in_node_t) {
5014 {
5015 .type = PM_IN_NODE,
5016 .node_id = PM_NODE_IDENTIFY(parser),
5017 .location = {
5018 .start = in_keyword->start,
5019 .end = end
5020 },
5021 },
5022 .pattern = pattern,
5023 .statements = statements,
5024 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5025 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5026 };
5027
5028 return node;
5029}
5030
5034static pm_instance_variable_and_write_node_t *
5035pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5036 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5037 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5038
5039 *node = (pm_instance_variable_and_write_node_t) {
5040 {
5041 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5042 .node_id = PM_NODE_IDENTIFY(parser),
5043 .location = {
5044 .start = target->base.location.start,
5045 .end = value->location.end
5046 }
5047 },
5048 .name = target->name,
5049 .name_loc = target->base.location,
5050 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5051 .value = value
5052 };
5053
5054 return node;
5055}
5056
5060static pm_instance_variable_operator_write_node_t *
5061pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5062 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5063
5064 *node = (pm_instance_variable_operator_write_node_t) {
5065 {
5066 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5067 .node_id = PM_NODE_IDENTIFY(parser),
5068 .location = {
5069 .start = target->base.location.start,
5070 .end = value->location.end
5071 }
5072 },
5073 .name = target->name,
5074 .name_loc = target->base.location,
5075 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5076 .value = value,
5077 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5078 };
5079
5080 return node;
5081}
5082
5086static pm_instance_variable_or_write_node_t *
5087pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5088 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5089 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5090
5091 *node = (pm_instance_variable_or_write_node_t) {
5092 {
5093 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5094 .node_id = PM_NODE_IDENTIFY(parser),
5095 .location = {
5096 .start = target->base.location.start,
5097 .end = value->location.end
5098 }
5099 },
5100 .name = target->name,
5101 .name_loc = target->base.location,
5102 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5103 .value = value
5104 };
5105
5106 return node;
5107}
5108
5112static pm_instance_variable_read_node_t *
5113pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5114 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5115 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5116
5117 *node = (pm_instance_variable_read_node_t) {
5118 {
5119 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5120 .node_id = PM_NODE_IDENTIFY(parser),
5121 .location = PM_LOCATION_TOKEN_VALUE(token)
5122 },
5123 .name = pm_parser_constant_id_token(parser, token)
5124 };
5125
5126 return node;
5127}
5128
5133static pm_instance_variable_write_node_t *
5134pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5135 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5136 *node = (pm_instance_variable_write_node_t) {
5137 {
5138 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5139 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5140 .node_id = PM_NODE_IDENTIFY(parser),
5141 .location = {
5142 .start = read_node->base.location.start,
5143 .end = value->location.end
5144 }
5145 },
5146 .name = read_node->name,
5147 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5148 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5149 .value = value
5150 };
5151
5152 return node;
5153}
5154
5160static void
5161pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5162 switch (PM_NODE_TYPE(part)) {
5163 case PM_STRING_NODE:
5164 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5165 break;
5166 case PM_EMBEDDED_STATEMENTS_NODE: {
5167 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5168 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5169
5170 if (embedded == NULL) {
5171 // If there are no statements or more than one statement, then
5172 // we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5175 // If the embedded statement is a string, then we can keep the
5176 // static literal flag and mark the string as frozen.
5177 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5178 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5179 // If the embedded statement is an interpolated string and it's
5180 // a static literal, then we can keep the static literal flag.
5181 } else {
5182 // Otherwise we lose the static literal flag.
5183 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5184 }
5185
5186 break;
5187 }
5188 case PM_EMBEDDED_VARIABLE_NODE:
5189 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5190 break;
5191 default:
5192 assert(false && "unexpected node type");
5193 break;
5194 }
5195
5196 pm_node_list_append(parts, part);
5197}
5198
5202static pm_interpolated_regular_expression_node_t *
5203pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5204 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5205
5206 *node = (pm_interpolated_regular_expression_node_t) {
5207 {
5208 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5210 .node_id = PM_NODE_IDENTIFY(parser),
5211 .location = {
5212 .start = opening->start,
5213 .end = NULL,
5214 },
5215 },
5216 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5217 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .parts = { 0 }
5219 };
5220
5221 return node;
5222}
5223
5224static inline void
5225pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5226 if (node->base.location.start > part->location.start) {
5227 node->base.location.start = part->location.start;
5228 }
5229 if (node->base.location.end < part->location.end) {
5230 node->base.location.end = part->location.end;
5231 }
5232
5233 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5234}
5235
5236static inline void
5237pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5238 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5239 node->base.location.end = closing->end;
5240 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5241}
5242
5266static inline void
5267pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5268#define CLEAR_FLAGS(node) \
5269 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5270
5271#define MUTABLE_FLAGS(node) \
5272 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5273
5274 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5275 node->base.location.start = part->location.start;
5276 }
5277
5278 node->base.location.end = MAX(node->base.location.end, part->location.end);
5279
5280 switch (PM_NODE_TYPE(part)) {
5281 case PM_STRING_NODE:
5282 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5283 break;
5284 case PM_INTERPOLATED_STRING_NODE:
5285 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5286 // If the string that we're concatenating is a static literal,
5287 // then we can keep the static literal flag for this string.
5288 } else {
5289 // Otherwise, we lose the static literal flag here and we should
5290 // also clear the mutability flags.
5291 CLEAR_FLAGS(node);
5292 }
5293 break;
5294 case PM_EMBEDDED_STATEMENTS_NODE: {
5295 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5296 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5297
5298 if (embedded == NULL) {
5299 // If we're embedding multiple statements or no statements, then
5300 // the string is not longer a static literal.
5301 CLEAR_FLAGS(node);
5302 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5303 // If the embedded statement is a string, then we can make that
5304 // string as frozen and static literal, and not touch the static
5305 // literal status of this string.
5306 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5307
5308 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5309 MUTABLE_FLAGS(node);
5310 }
5311 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5312 // If the embedded statement is an interpolated string, but that
5313 // string is marked as static literal, then we can keep our
5314 // static literal status for this string.
5315 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 MUTABLE_FLAGS(node);
5317 }
5318 } else {
5319 // In all other cases, we lose the static literal flag here and
5320 // become mutable.
5321 CLEAR_FLAGS(node);
5322 }
5323
5324 break;
5325 }
5326 case PM_EMBEDDED_VARIABLE_NODE:
5327 // Embedded variables clear static literal, which means we also
5328 // should clear the mutability flags.
5329 CLEAR_FLAGS(node);
5330 break;
5331 case PM_X_STRING_NODE:
5332 case PM_INTERPOLATED_X_STRING_NODE:
5333 // If this is an x string, then this is a syntax error. But we want
5334 // to handle it here so that we don't fail the assertion.
5335 CLEAR_FLAGS(node);
5336 break;
5337 default:
5338 assert(false && "unexpected node type");
5339 break;
5340 }
5341
5342 pm_node_list_append(&node->parts, part);
5343
5344#undef CLEAR_FLAGS
5345#undef MUTABLE_FLAGS
5346}
5347
5351static pm_interpolated_string_node_t *
5352pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5353 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5354 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5355
5356 switch (parser->frozen_string_literal) {
5357 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5358 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5359 break;
5360 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5361 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5362 break;
5363 }
5364
5365 *node = (pm_interpolated_string_node_t) {
5366 {
5367 .type = PM_INTERPOLATED_STRING_NODE,
5368 .flags = flags,
5369 .node_id = PM_NODE_IDENTIFY(parser),
5370 .location = {
5371 .start = opening->start,
5372 .end = closing->end,
5373 },
5374 },
5375 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5376 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5377 .parts = { 0 }
5378 };
5379
5380 if (parts != NULL) {
5381 pm_node_t *part;
5382 PM_NODE_LIST_FOREACH(parts, index, part) {
5383 pm_interpolated_string_node_append(node, part);
5384 }
5385 }
5386
5387 return node;
5388}
5389
5393static void
5394pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5395 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396 node->base.location.end = closing->end;
5397}
5398
5399static void
5400pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5401 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5402 node->base.location.start = part->location.start;
5403 }
5404
5405 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5406 node->base.location.end = MAX(node->base.location.end, part->location.end);
5407}
5408
5409static void
5410pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5411 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5412 node->base.location.end = closing->end;
5413}
5414
5418static pm_interpolated_symbol_node_t *
5419pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5420 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5421
5422 *node = (pm_interpolated_symbol_node_t) {
5423 {
5424 .type = PM_INTERPOLATED_SYMBOL_NODE,
5425 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5426 .node_id = PM_NODE_IDENTIFY(parser),
5427 .location = {
5428 .start = opening->start,
5429 .end = closing->end,
5430 },
5431 },
5432 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5433 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5434 .parts = { 0 }
5435 };
5436
5437 if (parts != NULL) {
5438 pm_node_t *part;
5439 PM_NODE_LIST_FOREACH(parts, index, part) {
5440 pm_interpolated_symbol_node_append(node, part);
5441 }
5442 }
5443
5444 return node;
5445}
5446
5450static pm_interpolated_x_string_node_t *
5451pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5452 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5453
5454 *node = (pm_interpolated_x_string_node_t) {
5455 {
5456 .type = PM_INTERPOLATED_X_STRING_NODE,
5457 .node_id = PM_NODE_IDENTIFY(parser),
5458 .location = {
5459 .start = opening->start,
5460 .end = closing->end
5461 },
5462 },
5463 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5464 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5465 .parts = { 0 }
5466 };
5467
5468 return node;
5469}
5470
5471static inline void
5472pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5473 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5474 node->base.location.end = part->location.end;
5475}
5476
5477static inline void
5478pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5479 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5480 node->base.location.end = closing->end;
5481}
5482
5486static pm_it_local_variable_read_node_t *
5487pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5488 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5489
5490 *node = (pm_it_local_variable_read_node_t) {
5491 {
5492 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5493 .node_id = PM_NODE_IDENTIFY(parser),
5494 .location = PM_LOCATION_TOKEN_VALUE(name)
5495 }
5496 };
5497
5498 return node;
5499}
5500
5504static pm_it_parameters_node_t *
5505pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5506 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5507
5508 *node = (pm_it_parameters_node_t) {
5509 {
5510 .type = PM_IT_PARAMETERS_NODE,
5511 .node_id = PM_NODE_IDENTIFY(parser),
5512 .location = {
5513 .start = opening->start,
5514 .end = closing->end
5515 }
5516 }
5517 };
5518
5519 return node;
5520}
5521
5525static pm_keyword_hash_node_t *
5526pm_keyword_hash_node_create(pm_parser_t *parser) {
5527 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5528
5529 *node = (pm_keyword_hash_node_t) {
5530 .base = {
5531 .type = PM_KEYWORD_HASH_NODE,
5532 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5533 .node_id = PM_NODE_IDENTIFY(parser),
5534 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5535 },
5536 .elements = { 0 }
5537 };
5538
5539 return node;
5540}
5541
5545static void
5546pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5547 // If the element being added is not an AssocNode or does not have a symbol
5548 // key, then we want to turn the SYMBOL_KEYS flag off.
5549 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5550 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5551 }
5552
5553 pm_node_list_append(&hash->elements, element);
5554 if (hash->base.location.start == NULL) {
5555 hash->base.location.start = element->location.start;
5556 }
5557 hash->base.location.end = element->location.end;
5558}
5559
5563static pm_required_keyword_parameter_node_t *
5564pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5565 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5566
5567 *node = (pm_required_keyword_parameter_node_t) {
5568 {
5569 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5570 .node_id = PM_NODE_IDENTIFY(parser),
5571 .location = {
5572 .start = name->start,
5573 .end = name->end
5574 },
5575 },
5576 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5577 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5578 };
5579
5580 return node;
5581}
5582
5586static pm_optional_keyword_parameter_node_t *
5587pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5588 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5589
5590 *node = (pm_optional_keyword_parameter_node_t) {
5591 {
5592 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5593 .node_id = PM_NODE_IDENTIFY(parser),
5594 .location = {
5595 .start = name->start,
5596 .end = value->location.end
5597 },
5598 },
5599 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5600 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5601 .value = value
5602 };
5603
5604 return node;
5605}
5606
5610static pm_keyword_rest_parameter_node_t *
5611pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5612 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5613
5614 *node = (pm_keyword_rest_parameter_node_t) {
5615 {
5616 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5617 .node_id = PM_NODE_IDENTIFY(parser),
5618 .location = {
5619 .start = operator->start,
5620 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5621 },
5622 },
5623 .name = pm_parser_optional_constant_id_token(parser, name),
5624 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5625 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5626 };
5627
5628 return node;
5629}
5630
5634static pm_lambda_node_t *
5635pm_lambda_node_create(
5636 pm_parser_t *parser,
5637 pm_constant_id_list_t *locals,
5638 const pm_token_t *operator,
5639 const pm_token_t *opening,
5640 const pm_token_t *closing,
5641 pm_node_t *parameters,
5642 pm_node_t *body
5643) {
5644 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5645
5646 *node = (pm_lambda_node_t) {
5647 {
5648 .type = PM_LAMBDA_NODE,
5649 .node_id = PM_NODE_IDENTIFY(parser),
5650 .location = {
5651 .start = operator->start,
5652 .end = closing->end
5653 },
5654 },
5655 .locals = *locals,
5656 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5657 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5658 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5659 .parameters = parameters,
5660 .body = body
5661 };
5662
5663 return node;
5664}
5665
5669static pm_local_variable_and_write_node_t *
5670pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5671 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5672 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5673 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5674
5675 *node = (pm_local_variable_and_write_node_t) {
5676 {
5677 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5678 .node_id = PM_NODE_IDENTIFY(parser),
5679 .location = {
5680 .start = target->location.start,
5681 .end = value->location.end
5682 }
5683 },
5684 .name_loc = target->location,
5685 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5686 .value = value,
5687 .name = name,
5688 .depth = depth
5689 };
5690
5691 return node;
5692}
5693
5697static pm_local_variable_operator_write_node_t *
5698pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5699 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5700
5701 *node = (pm_local_variable_operator_write_node_t) {
5702 {
5703 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5704 .node_id = PM_NODE_IDENTIFY(parser),
5705 .location = {
5706 .start = target->location.start,
5707 .end = value->location.end
5708 }
5709 },
5710 .name_loc = target->location,
5711 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5712 .value = value,
5713 .name = name,
5714 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5715 .depth = depth
5716 };
5717
5718 return node;
5719}
5720
5724static pm_local_variable_or_write_node_t *
5725pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5726 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5727 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5728 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5729
5730 *node = (pm_local_variable_or_write_node_t) {
5731 {
5732 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5733 .node_id = PM_NODE_IDENTIFY(parser),
5734 .location = {
5735 .start = target->location.start,
5736 .end = value->location.end
5737 }
5738 },
5739 .name_loc = target->location,
5740 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5741 .value = value,
5742 .name = name,
5743 .depth = depth
5744 };
5745
5746 return node;
5747}
5748
5752static pm_local_variable_read_node_t *
5753pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5754 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5755
5756 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5757
5758 *node = (pm_local_variable_read_node_t) {
5759 {
5760 .type = PM_LOCAL_VARIABLE_READ_NODE,
5761 .node_id = PM_NODE_IDENTIFY(parser),
5762 .location = PM_LOCATION_TOKEN_VALUE(name)
5763 },
5764 .name = name_id,
5765 .depth = depth
5766 };
5767
5768 return node;
5769}
5770
5774static pm_local_variable_read_node_t *
5775pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5776 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5777 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5778}
5779
5784static pm_local_variable_read_node_t *
5785pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5786 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5787 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5788}
5789
5793static pm_local_variable_write_node_t *
5794pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5795 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5796
5797 *node = (pm_local_variable_write_node_t) {
5798 {
5799 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5800 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5801 .node_id = PM_NODE_IDENTIFY(parser),
5802 .location = {
5803 .start = name_loc->start,
5804 .end = value->location.end
5805 }
5806 },
5807 .name = name,
5808 .depth = depth,
5809 .value = value,
5810 .name_loc = *name_loc,
5811 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5812 };
5813
5814 return node;
5815}
5816
5820static inline bool
5821pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5822 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5823}
5824
5829static inline bool
5830pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5831 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5832}
5833
5838static inline void
5839pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5840 if (pm_token_is_numbered_parameter(start, end)) {
5841 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5842 }
5843}
5844
5849static pm_local_variable_target_node_t *
5850pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5851 pm_refute_numbered_parameter(parser, location->start, location->end);
5852 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5853
5854 *node = (pm_local_variable_target_node_t) {
5855 {
5856 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5857 .node_id = PM_NODE_IDENTIFY(parser),
5858 .location = *location
5859 },
5860 .name = name,
5861 .depth = depth
5862 };
5863
5864 return node;
5865}
5866
5870static pm_match_predicate_node_t *
5871pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5872 pm_assert_value_expression(parser, value);
5873
5874 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5875
5876 *node = (pm_match_predicate_node_t) {
5877 {
5878 .type = PM_MATCH_PREDICATE_NODE,
5879 .node_id = PM_NODE_IDENTIFY(parser),
5880 .location = {
5881 .start = value->location.start,
5882 .end = pattern->location.end
5883 }
5884 },
5885 .value = value,
5886 .pattern = pattern,
5887 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5888 };
5889
5890 return node;
5891}
5892
5896static pm_match_required_node_t *
5897pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5898 pm_assert_value_expression(parser, value);
5899
5900 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5901
5902 *node = (pm_match_required_node_t) {
5903 {
5904 .type = PM_MATCH_REQUIRED_NODE,
5905 .node_id = PM_NODE_IDENTIFY(parser),
5906 .location = {
5907 .start = value->location.start,
5908 .end = pattern->location.end
5909 }
5910 },
5911 .value = value,
5912 .pattern = pattern,
5913 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5914 };
5915
5916 return node;
5917}
5918
5922static pm_match_write_node_t *
5923pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5924 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5925
5926 *node = (pm_match_write_node_t) {
5927 {
5928 .type = PM_MATCH_WRITE_NODE,
5929 .node_id = PM_NODE_IDENTIFY(parser),
5930 .location = call->base.location
5931 },
5932 .call = call,
5933 .targets = { 0 }
5934 };
5935
5936 return node;
5937}
5938
5942static pm_module_node_t *
5943pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5944 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5945
5946 *node = (pm_module_node_t) {
5947 {
5948 .type = PM_MODULE_NODE,
5949 .node_id = PM_NODE_IDENTIFY(parser),
5950 .location = {
5951 .start = module_keyword->start,
5952 .end = end_keyword->end
5953 }
5954 },
5955 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5956 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5957 .constant_path = constant_path,
5958 .body = body,
5959 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5960 .name = pm_parser_constant_id_token(parser, name)
5961 };
5962
5963 return node;
5964}
5965
5969static pm_multi_target_node_t *
5970pm_multi_target_node_create(pm_parser_t *parser) {
5971 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5972
5973 *node = (pm_multi_target_node_t) {
5974 {
5975 .type = PM_MULTI_TARGET_NODE,
5976 .node_id = PM_NODE_IDENTIFY(parser),
5977 .location = { .start = NULL, .end = NULL }
5978 },
5979 .lefts = { 0 },
5980 .rest = NULL,
5981 .rights = { 0 },
5982 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5983 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5984 };
5985
5986 return node;
5987}
5988
5992static void
5993pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5994 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5995 if (node->rest == NULL) {
5996 node->rest = target;
5997 } else {
5998 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5999 pm_node_list_append(&node->rights, target);
6000 }
6001 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6002 if (node->rest == NULL) {
6003 node->rest = target;
6004 } else {
6005 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6006 pm_node_list_append(&node->rights, target);
6007 }
6008 } else if (node->rest == NULL) {
6009 pm_node_list_append(&node->lefts, target);
6010 } else {
6011 pm_node_list_append(&node->rights, target);
6012 }
6013
6014 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6015 node->base.location.start = target->location.start;
6016 }
6017
6018 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6019 node->base.location.end = target->location.end;
6020 }
6021}
6022
6026static void
6027pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6028 node->base.location.start = lparen->start;
6029 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6030}
6031
6035static void
6036pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6037 node->base.location.end = rparen->end;
6038 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6039}
6040
6044static pm_multi_write_node_t *
6045pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6046 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6047
6048 *node = (pm_multi_write_node_t) {
6049 {
6050 .type = PM_MULTI_WRITE_NODE,
6051 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6052 .node_id = PM_NODE_IDENTIFY(parser),
6053 .location = {
6054 .start = target->base.location.start,
6055 .end = value->location.end
6056 }
6057 },
6058 .lefts = target->lefts,
6059 .rest = target->rest,
6060 .rights = target->rights,
6061 .lparen_loc = target->lparen_loc,
6062 .rparen_loc = target->rparen_loc,
6063 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6064 .value = value
6065 };
6066
6067 // Explicitly do not call pm_node_destroy here because we want to keep
6068 // around all of the information within the MultiWriteNode node.
6069 xfree(target);
6070
6071 return node;
6072}
6073
6077static pm_next_node_t *
6078pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6079 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6080 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6081
6082 *node = (pm_next_node_t) {
6083 {
6084 .type = PM_NEXT_NODE,
6085 .node_id = PM_NODE_IDENTIFY(parser),
6086 .location = {
6087 .start = keyword->start,
6088 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6089 }
6090 },
6091 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6092 .arguments = arguments
6093 };
6094
6095 return node;
6096}
6097
6101static pm_nil_node_t *
6102pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6103 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6104 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6105
6106 *node = (pm_nil_node_t) {{
6107 .type = PM_NIL_NODE,
6108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6109 .node_id = PM_NODE_IDENTIFY(parser),
6110 .location = PM_LOCATION_TOKEN_VALUE(token)
6111 }};
6112
6113 return node;
6114}
6115
6119static pm_no_keywords_parameter_node_t *
6120pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6121 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6122 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6123 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6124
6125 *node = (pm_no_keywords_parameter_node_t) {
6126 {
6127 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6128 .node_id = PM_NODE_IDENTIFY(parser),
6129 .location = {
6130 .start = operator->start,
6131 .end = keyword->end
6132 }
6133 },
6134 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6135 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6136 };
6137
6138 return node;
6139}
6140
6144static pm_numbered_parameters_node_t *
6145pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6146 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6147
6148 *node = (pm_numbered_parameters_node_t) {
6149 {
6150 .type = PM_NUMBERED_PARAMETERS_NODE,
6151 .node_id = PM_NODE_IDENTIFY(parser),
6152 .location = *location
6153 },
6154 .maximum = maximum
6155 };
6156
6157 return node;
6158}
6159
6164#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6165
6172static uint32_t
6173pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6174 const uint8_t *start = token->start + 1;
6175 const uint8_t *end = token->end;
6176
6177 ptrdiff_t diff = end - start;
6178 assert(diff > 0);
6179#if PTRDIFF_MAX > SIZE_MAX
6180 assert(diff < (ptrdiff_t) SIZE_MAX);
6181#endif
6182 size_t length = (size_t) diff;
6183
6184 char *digits = xcalloc(length + 1, sizeof(char));
6185 memcpy(digits, start, length);
6186 digits[length] = '\0';
6187
6188 char *endptr;
6189 errno = 0;
6190 unsigned long value = strtoul(digits, &endptr, 10);
6191
6192 if ((digits == endptr) || (*endptr != '\0')) {
6193 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6194 value = 0;
6195 }
6196
6197 xfree(digits);
6198
6199 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6200 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6201 value = 0;
6202 }
6203
6204 return (uint32_t) value;
6205}
6206
6207#undef NTH_REF_MAX
6208
6212static pm_numbered_reference_read_node_t *
6213pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6214 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6215 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6216
6217 *node = (pm_numbered_reference_read_node_t) {
6218 {
6219 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6220 .node_id = PM_NODE_IDENTIFY(parser),
6221 .location = PM_LOCATION_TOKEN_VALUE(name),
6222 },
6223 .number = pm_numbered_reference_read_node_number(parser, name)
6224 };
6225
6226 return node;
6227}
6228
6232static pm_optional_parameter_node_t *
6233pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6234 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6235
6236 *node = (pm_optional_parameter_node_t) {
6237 {
6238 .type = PM_OPTIONAL_PARAMETER_NODE,
6239 .node_id = PM_NODE_IDENTIFY(parser),
6240 .location = {
6241 .start = name->start,
6242 .end = value->location.end
6243 }
6244 },
6245 .name = pm_parser_constant_id_token(parser, name),
6246 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6247 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6248 .value = value
6249 };
6250
6251 return node;
6252}
6253
6257static pm_or_node_t *
6258pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6259 pm_assert_value_expression(parser, left);
6260
6261 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6262
6263 *node = (pm_or_node_t) {
6264 {
6265 .type = PM_OR_NODE,
6266 .node_id = PM_NODE_IDENTIFY(parser),
6267 .location = {
6268 .start = left->location.start,
6269 .end = right->location.end
6270 }
6271 },
6272 .left = left,
6273 .right = right,
6274 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6275 };
6276
6277 return node;
6278}
6279
6283static pm_parameters_node_t *
6284pm_parameters_node_create(pm_parser_t *parser) {
6285 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6286
6287 *node = (pm_parameters_node_t) {
6288 {
6289 .type = PM_PARAMETERS_NODE,
6290 .node_id = PM_NODE_IDENTIFY(parser),
6291 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6292 },
6293 .rest = NULL,
6294 .keyword_rest = NULL,
6295 .block = NULL,
6296 .requireds = { 0 },
6297 .optionals = { 0 },
6298 .posts = { 0 },
6299 .keywords = { 0 }
6300 };
6301
6302 return node;
6303}
6304
6308static void
6309pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6310 if (params->base.location.start == NULL) {
6311 params->base.location.start = param->location.start;
6312 } else {
6313 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6314 }
6315
6316 if (params->base.location.end == NULL) {
6317 params->base.location.end = param->location.end;
6318 } else {
6319 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6320 }
6321}
6322
6326static void
6327pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6328 pm_parameters_node_location_set(params, param);
6329 pm_node_list_append(&params->requireds, param);
6330}
6331
6335static void
6336pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6337 pm_parameters_node_location_set(params, (pm_node_t *) param);
6338 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6339}
6340
6344static void
6345pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6346 pm_parameters_node_location_set(params, param);
6347 pm_node_list_append(&params->posts, param);
6348}
6349
6353static void
6354pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6355 pm_parameters_node_location_set(params, param);
6356 params->rest = param;
6357}
6358
6362static void
6363pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6364 pm_parameters_node_location_set(params, param);
6365 pm_node_list_append(&params->keywords, param);
6366}
6367
6371static void
6372pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6373 assert(params->keyword_rest == NULL);
6374 pm_parameters_node_location_set(params, param);
6375 params->keyword_rest = param;
6376}
6377
6381static void
6382pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6383 assert(params->block == NULL);
6384 pm_parameters_node_location_set(params, (pm_node_t *) param);
6385 params->block = param;
6386}
6387
6391static pm_program_node_t *
6392pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6393 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6394
6395 *node = (pm_program_node_t) {
6396 {
6397 .type = PM_PROGRAM_NODE,
6398 .node_id = PM_NODE_IDENTIFY(parser),
6399 .location = {
6400 .start = statements == NULL ? parser->start : statements->base.location.start,
6401 .end = statements == NULL ? parser->end : statements->base.location.end
6402 }
6403 },
6404 .locals = *locals,
6405 .statements = statements
6406 };
6407
6408 return node;
6409}
6410
6414static pm_parentheses_node_t *
6415pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6416 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6417
6418 *node = (pm_parentheses_node_t) {
6419 {
6420 .type = PM_PARENTHESES_NODE,
6421 .flags = flags,
6422 .node_id = PM_NODE_IDENTIFY(parser),
6423 .location = {
6424 .start = opening->start,
6425 .end = closing->end
6426 }
6427 },
6428 .body = body,
6429 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6430 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6431 };
6432
6433 return node;
6434}
6435
6439static pm_pinned_expression_node_t *
6440pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6441 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6442
6443 *node = (pm_pinned_expression_node_t) {
6444 {
6445 .type = PM_PINNED_EXPRESSION_NODE,
6446 .node_id = PM_NODE_IDENTIFY(parser),
6447 .location = {
6448 .start = operator->start,
6449 .end = rparen->end
6450 }
6451 },
6452 .expression = expression,
6453 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6454 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6455 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6456 };
6457
6458 return node;
6459}
6460
6464static pm_pinned_variable_node_t *
6465pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6466 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6467
6468 *node = (pm_pinned_variable_node_t) {
6469 {
6470 .type = PM_PINNED_VARIABLE_NODE,
6471 .node_id = PM_NODE_IDENTIFY(parser),
6472 .location = {
6473 .start = operator->start,
6474 .end = variable->location.end
6475 }
6476 },
6477 .variable = variable,
6478 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6479 };
6480
6481 return node;
6482}
6483
6487static pm_post_execution_node_t *
6488pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6489 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6490
6491 *node = (pm_post_execution_node_t) {
6492 {
6493 .type = PM_POST_EXECUTION_NODE,
6494 .node_id = PM_NODE_IDENTIFY(parser),
6495 .location = {
6496 .start = keyword->start,
6497 .end = closing->end
6498 }
6499 },
6500 .statements = statements,
6501 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6502 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6503 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6504 };
6505
6506 return node;
6507}
6508
6512static pm_pre_execution_node_t *
6513pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6514 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6515
6516 *node = (pm_pre_execution_node_t) {
6517 {
6518 .type = PM_PRE_EXECUTION_NODE,
6519 .node_id = PM_NODE_IDENTIFY(parser),
6520 .location = {
6521 .start = keyword->start,
6522 .end = closing->end
6523 }
6524 },
6525 .statements = statements,
6526 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6527 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6528 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6529 };
6530
6531 return node;
6532}
6533
6537static pm_range_node_t *
6538pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6539 pm_assert_value_expression(parser, left);
6540 pm_assert_value_expression(parser, right);
6541
6542 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6543 pm_node_flags_t flags = 0;
6544
6545 // Indicate that this node is an exclusive range if the operator is `...`.
6546 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6547 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6548 }
6549
6550 // Indicate that this node is a static literal (i.e., can be compiled with
6551 // a putobject in CRuby) if the left and right are implicit nil, explicit
6552 // nil, or integers.
6553 if (
6554 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6555 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6556 ) {
6557 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6558 }
6559
6560 *node = (pm_range_node_t) {
6561 {
6562 .type = PM_RANGE_NODE,
6563 .flags = flags,
6564 .node_id = PM_NODE_IDENTIFY(parser),
6565 .location = {
6566 .start = (left == NULL ? operator->start : left->location.start),
6567 .end = (right == NULL ? operator->end : right->location.end)
6568 }
6569 },
6570 .left = left,
6571 .right = right,
6572 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6573 };
6574
6575 return node;
6576}
6577
6581static pm_redo_node_t *
6582pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6583 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6584 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6585
6586 *node = (pm_redo_node_t) {{
6587 .type = PM_REDO_NODE,
6588 .node_id = PM_NODE_IDENTIFY(parser),
6589 .location = PM_LOCATION_TOKEN_VALUE(token)
6590 }};
6591
6592 return node;
6593}
6594
6599static pm_regular_expression_node_t *
6600pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6601 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6602
6603 *node = (pm_regular_expression_node_t) {
6604 {
6605 .type = PM_REGULAR_EXPRESSION_NODE,
6606 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6607 .node_id = PM_NODE_IDENTIFY(parser),
6608 .location = {
6609 .start = MIN(opening->start, closing->start),
6610 .end = MAX(opening->end, closing->end)
6611 }
6612 },
6613 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6614 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6615 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6616 .unescaped = *unescaped
6617 };
6618
6619 return node;
6620}
6621
6625static inline pm_regular_expression_node_t *
6626pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6627 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6628}
6629
6633static pm_required_parameter_node_t *
6634pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6635 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6636
6637 *node = (pm_required_parameter_node_t) {
6638 {
6639 .type = PM_REQUIRED_PARAMETER_NODE,
6640 .node_id = PM_NODE_IDENTIFY(parser),
6641 .location = PM_LOCATION_TOKEN_VALUE(token)
6642 },
6643 .name = pm_parser_constant_id_token(parser, token)
6644 };
6645
6646 return node;
6647}
6648
6652static pm_rescue_modifier_node_t *
6653pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6654 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6655
6656 *node = (pm_rescue_modifier_node_t) {
6657 {
6658 .type = PM_RESCUE_MODIFIER_NODE,
6659 .node_id = PM_NODE_IDENTIFY(parser),
6660 .location = {
6661 .start = expression->location.start,
6662 .end = rescue_expression->location.end
6663 }
6664 },
6665 .expression = expression,
6666 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667 .rescue_expression = rescue_expression
6668 };
6669
6670 return node;
6671}
6672
6676static pm_rescue_node_t *
6677pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6678 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6679
6680 *node = (pm_rescue_node_t) {
6681 {
6682 .type = PM_RESCUE_NODE,
6683 .node_id = PM_NODE_IDENTIFY(parser),
6684 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6685 },
6686 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6687 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6688 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6689 .reference = NULL,
6690 .statements = NULL,
6691 .subsequent = NULL,
6692 .exceptions = { 0 }
6693 };
6694
6695 return node;
6696}
6697
6698static inline void
6699pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6700 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6701}
6702
6706static void
6707pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6708 node->reference = reference;
6709 node->base.location.end = reference->location.end;
6710}
6711
6715static void
6716pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6717 node->statements = statements;
6718 if (pm_statements_node_body_length(statements) > 0) {
6719 node->base.location.end = statements->base.location.end;
6720 }
6721}
6722
6726static void
6727pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6728 node->subsequent = subsequent;
6729 node->base.location.end = subsequent->base.location.end;
6730}
6731
6735static void
6736pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6737 pm_node_list_append(&node->exceptions, exception);
6738 node->base.location.end = exception->location.end;
6739}
6740
6744static pm_rest_parameter_node_t *
6745pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6746 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6747
6748 *node = (pm_rest_parameter_node_t) {
6749 {
6750 .type = PM_REST_PARAMETER_NODE,
6751 .node_id = PM_NODE_IDENTIFY(parser),
6752 .location = {
6753 .start = operator->start,
6754 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6755 }
6756 },
6757 .name = pm_parser_optional_constant_id_token(parser, name),
6758 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6759 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6760 };
6761
6762 return node;
6763}
6764
6768static pm_retry_node_t *
6769pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6770 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6771 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6772
6773 *node = (pm_retry_node_t) {{
6774 .type = PM_RETRY_NODE,
6775 .node_id = PM_NODE_IDENTIFY(parser),
6776 .location = PM_LOCATION_TOKEN_VALUE(token)
6777 }};
6778
6779 return node;
6780}
6781
6785static pm_return_node_t *
6786pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6787 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6788
6789 *node = (pm_return_node_t) {
6790 {
6791 .type = PM_RETURN_NODE,
6792 .node_id = PM_NODE_IDENTIFY(parser),
6793 .location = {
6794 .start = keyword->start,
6795 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6796 }
6797 },
6798 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6799 .arguments = arguments
6800 };
6801
6802 return node;
6803}
6804
6808static pm_self_node_t *
6809pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6810 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6811 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6812
6813 *node = (pm_self_node_t) {{
6814 .type = PM_SELF_NODE,
6815 .node_id = PM_NODE_IDENTIFY(parser),
6816 .location = PM_LOCATION_TOKEN_VALUE(token)
6817 }};
6818
6819 return node;
6820}
6821
6825static pm_shareable_constant_node_t *
6826pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6827 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6828
6829 *node = (pm_shareable_constant_node_t) {
6830 {
6831 .type = PM_SHAREABLE_CONSTANT_NODE,
6832 .flags = (pm_node_flags_t) value,
6833 .node_id = PM_NODE_IDENTIFY(parser),
6834 .location = PM_LOCATION_NODE_VALUE(write)
6835 },
6836 .write = write
6837 };
6838
6839 return node;
6840}
6841
6845static pm_singleton_class_node_t *
6846pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6847 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6848
6849 *node = (pm_singleton_class_node_t) {
6850 {
6851 .type = PM_SINGLETON_CLASS_NODE,
6852 .node_id = PM_NODE_IDENTIFY(parser),
6853 .location = {
6854 .start = class_keyword->start,
6855 .end = end_keyword->end
6856 }
6857 },
6858 .locals = *locals,
6859 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6860 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6861 .expression = expression,
6862 .body = body,
6863 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6864 };
6865
6866 return node;
6867}
6868
6872static pm_source_encoding_node_t *
6873pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6874 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6875 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6876
6877 *node = (pm_source_encoding_node_t) {{
6878 .type = PM_SOURCE_ENCODING_NODE,
6879 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6880 .node_id = PM_NODE_IDENTIFY(parser),
6881 .location = PM_LOCATION_TOKEN_VALUE(token)
6882 }};
6883
6884 return node;
6885}
6886
6890static pm_source_file_node_t*
6891pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6892 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6893 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6894
6895 pm_node_flags_t flags = 0;
6896
6897 switch (parser->frozen_string_literal) {
6898 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6899 flags |= PM_STRING_FLAGS_MUTABLE;
6900 break;
6901 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6902 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6903 break;
6904 }
6905
6906 *node = (pm_source_file_node_t) {
6907 {
6908 .type = PM_SOURCE_FILE_NODE,
6909 .flags = flags,
6910 .node_id = PM_NODE_IDENTIFY(parser),
6911 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6912 },
6913 .filepath = parser->filepath
6914 };
6915
6916 return node;
6917}
6918
6922static pm_source_line_node_t *
6923pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6924 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6925 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6926
6927 *node = (pm_source_line_node_t) {{
6928 .type = PM_SOURCE_LINE_NODE,
6929 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6930 .node_id = PM_NODE_IDENTIFY(parser),
6931 .location = PM_LOCATION_TOKEN_VALUE(token)
6932 }};
6933
6934 return node;
6935}
6936
6940static pm_splat_node_t *
6941pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6942 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6943
6944 *node = (pm_splat_node_t) {
6945 {
6946 .type = PM_SPLAT_NODE,
6947 .node_id = PM_NODE_IDENTIFY(parser),
6948 .location = {
6949 .start = operator->start,
6950 .end = (expression == NULL ? operator->end : expression->location.end)
6951 }
6952 },
6953 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6954 .expression = expression
6955 };
6956
6957 return node;
6958}
6959
6963static pm_statements_node_t *
6964pm_statements_node_create(pm_parser_t *parser) {
6965 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6966
6967 *node = (pm_statements_node_t) {
6968 {
6969 .type = PM_STATEMENTS_NODE,
6970 .node_id = PM_NODE_IDENTIFY(parser),
6971 .location = PM_LOCATION_NULL_VALUE(parser)
6972 },
6973 .body = { 0 }
6974 };
6975
6976 return node;
6977}
6978
6982static size_t
6983pm_statements_node_body_length(pm_statements_node_t *node) {
6984 return node && node->body.size;
6985}
6986
6990static void
6991pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6992 node->base.location = (pm_location_t) { .start = start, .end = end };
6993}
6994
6999static inline void
7000pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7001 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7002 node->base.location.start = statement->location.start;
7003 }
7004
7005 if (statement->location.end > node->base.location.end) {
7006 node->base.location.end = statement->location.end;
7007 }
7008}
7009
7013static void
7014pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7015 pm_statements_node_body_update(node, statement);
7016
7017 if (node->body.size > 0) {
7018 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7019
7020 switch (PM_NODE_TYPE(previous)) {
7021 case PM_BREAK_NODE:
7022 case PM_NEXT_NODE:
7023 case PM_REDO_NODE:
7024 case PM_RETRY_NODE:
7025 case PM_RETURN_NODE:
7026 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7027 break;
7028 default:
7029 break;
7030 }
7031 }
7032
7033 pm_node_list_append(&node->body, statement);
7034 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7035}
7036
7040static void
7041pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7042 pm_statements_node_body_update(node, statement);
7043 pm_node_list_prepend(&node->body, statement);
7044 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7045}
7046
7050static inline pm_string_node_t *
7051pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7052 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7053 pm_node_flags_t flags = 0;
7054
7055 switch (parser->frozen_string_literal) {
7056 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7057 flags = PM_STRING_FLAGS_MUTABLE;
7058 break;
7059 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7060 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7061 break;
7062 }
7063
7064 *node = (pm_string_node_t) {
7065 {
7066 .type = PM_STRING_NODE,
7067 .flags = flags,
7068 .node_id = PM_NODE_IDENTIFY(parser),
7069 .location = {
7070 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7071 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7072 }
7073 },
7074 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7075 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7076 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7077 .unescaped = *string
7078 };
7079
7080 return node;
7081}
7082
7086static pm_string_node_t *
7087pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7088 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7089}
7090
7095static pm_string_node_t *
7096pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7097 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7098 parser->current_string = PM_STRING_EMPTY;
7099 return node;
7100}
7101
7105static pm_super_node_t *
7106pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7107 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7108 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7109
7110 const uint8_t *end = pm_arguments_end(arguments);
7111 if (end == NULL) {
7112 assert(false && "unreachable");
7113 }
7114
7115 *node = (pm_super_node_t) {
7116 {
7117 .type = PM_SUPER_NODE,
7118 .node_id = PM_NODE_IDENTIFY(parser),
7119 .location = {
7120 .start = keyword->start,
7121 .end = end,
7122 }
7123 },
7124 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7125 .lparen_loc = arguments->opening_loc,
7126 .arguments = arguments->arguments,
7127 .rparen_loc = arguments->closing_loc,
7128 .block = arguments->block
7129 };
7130
7131 return node;
7132}
7133
7138static bool
7139pm_ascii_only_p(const pm_string_t *contents) {
7140 const size_t length = pm_string_length(contents);
7141 const uint8_t *source = pm_string_source(contents);
7142
7143 for (size_t index = 0; index < length; index++) {
7144 if (source[index] & 0x80) return false;
7145 }
7146
7147 return true;
7148}
7149
7153static void
7154parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7155 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7156 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7157
7158 if (width == 0) {
7159 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7160 break;
7161 }
7162
7163 cursor += width;
7164 }
7165}
7166
7171static void
7172parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7173 const pm_encoding_t *encoding = parser->encoding;
7174
7175 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7176 size_t width = encoding->char_width(cursor, end - cursor);
7177
7178 if (width == 0) {
7179 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7180 break;
7181 }
7182
7183 cursor += width;
7184 }
7185}
7186
7196static inline pm_node_flags_t
7197parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7198 if (parser->explicit_encoding != NULL) {
7199 // A Symbol may optionally have its encoding explicitly set. This will
7200 // happen if an escape sequence results in a non-ASCII code point.
7201 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7202 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7203 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7204 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7205 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7206 } else if (validate) {
7207 parse_symbol_encoding_validate_other(parser, location, contents);
7208 }
7209 } else if (pm_ascii_only_p(contents)) {
7210 // Ruby stipulates that all source files must use an ASCII-compatible
7211 // encoding. Thus, all symbols appearing in source are eligible for
7212 // "downgrading" to US-ASCII.
7213 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7214 } else if (validate) {
7215 parse_symbol_encoding_validate_other(parser, location, contents);
7216 }
7217
7218 return 0;
7219}
7220
7221static pm_node_flags_t
7222parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7223 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7224 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7225 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7226 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7227
7228 // There's special validation logic used if a string does not contain any character escape sequences.
7229 if (parser->explicit_encoding == NULL) {
7230 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7231 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7232 // the US-ASCII encoding.
7233 if (ascii_only) {
7234 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7235 }
7236
7237 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7238 if (!ascii_only) {
7239 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7240 }
7241 } else if (parser->encoding != modifier_encoding) {
7242 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7243
7244 if (modifier == 'n' && !ascii_only) {
7245 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7246 }
7247 }
7248
7249 return flags;
7250 }
7251
7252 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7253 bool mixed_encoding = false;
7254
7255 if (mixed_encoding) {
7256 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7257 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7258 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7259 bool valid_string_in_modifier_encoding = true;
7260
7261 if (!valid_string_in_modifier_encoding) {
7262 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7263 }
7264 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7265 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7266 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7267 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7268 }
7269 }
7270
7271 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7272 return flags;
7273}
7274
7281static pm_node_flags_t
7282parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7283 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7284 bool valid_unicode_range = true;
7285 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7286 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7287 return flags;
7288 }
7289
7290 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7291 // to multi-byte characters are allowed.
7292 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7293 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7294 // following error message appearing twice. We do the same for compatibility.
7295 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7296 }
7297
7306 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7307 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7308 }
7309
7310 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7311 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7312 }
7313
7314 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7315 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7316 }
7317
7318 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7319 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7320 }
7321
7322 // At this point no encoding modifiers will be present on the regular expression as they would have already
7323 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7324 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7325 if (ascii_only) {
7326 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7327 }
7328
7329 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7330 // or by specifying a modifier.
7331 //
7332 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7333 if (parser->explicit_encoding != NULL) {
7334 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7335 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7336 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7337 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7338 }
7339 }
7340
7341 return 0;
7342}
7343
7348static pm_symbol_node_t *
7349pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7350 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7351
7352 *node = (pm_symbol_node_t) {
7353 {
7354 .type = PM_SYMBOL_NODE,
7355 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7356 .node_id = PM_NODE_IDENTIFY(parser),
7357 .location = {
7358 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7359 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7360 }
7361 },
7362 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7363 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7364 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7365 .unescaped = *unescaped
7366 };
7367
7368 return node;
7369}
7370
7374static inline pm_symbol_node_t *
7375pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7376 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7377}
7378
7382static pm_symbol_node_t *
7383pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7384 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7385 parser->current_string = PM_STRING_EMPTY;
7386 return node;
7387}
7388
7392static pm_symbol_node_t *
7393pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7394 pm_symbol_node_t *node;
7395
7396 switch (token->type) {
7397 case PM_TOKEN_LABEL: {
7398 pm_token_t opening = not_provided(parser);
7399 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7400
7401 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7402 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7403
7404 assert((label.end - label.start) >= 0);
7405 pm_string_shared_init(&node->unescaped, label.start, label.end);
7406 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7407
7408 break;
7409 }
7410 case PM_TOKEN_MISSING: {
7411 pm_token_t opening = not_provided(parser);
7412 pm_token_t closing = not_provided(parser);
7413
7414 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7415 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7416 break;
7417 }
7418 default:
7419 assert(false && "unreachable");
7420 node = NULL;
7421 break;
7422 }
7423
7424 return node;
7425}
7426
7430static pm_symbol_node_t *
7431pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7432 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7433
7434 *node = (pm_symbol_node_t) {
7435 {
7436 .type = PM_SYMBOL_NODE,
7437 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7438 .node_id = PM_NODE_IDENTIFY(parser),
7439 .location = PM_LOCATION_NULL_VALUE(parser)
7440 },
7441 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7442 .unescaped = { 0 }
7443 };
7444
7445 pm_string_constant_init(&node->unescaped, content, strlen(content));
7446 return node;
7447}
7448
7452static bool
7453pm_symbol_node_label_p(pm_node_t *node) {
7454 const uint8_t *end = NULL;
7455
7456 switch (PM_NODE_TYPE(node)) {
7457 case PM_SYMBOL_NODE:
7458 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7459 break;
7460 case PM_INTERPOLATED_SYMBOL_NODE:
7461 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7462 break;
7463 default:
7464 return false;
7465 }
7466
7467 return (end != NULL) && (end[-1] == ':');
7468}
7469
7473static pm_symbol_node_t *
7474pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7475 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7476
7477 *new_node = (pm_symbol_node_t) {
7478 {
7479 .type = PM_SYMBOL_NODE,
7480 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7481 .node_id = PM_NODE_IDENTIFY(parser),
7482 .location = {
7483 .start = opening->start,
7484 .end = closing->end
7485 }
7486 },
7487 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7488 .value_loc = node->content_loc,
7489 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7490 .unescaped = node->unescaped
7491 };
7492
7493 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7494 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7495
7496 // We are explicitly _not_ using pm_node_destroy here because we don't want
7497 // to trash the unescaped string. We could instead copy the string if we
7498 // know that it is owned, but we're taking the fast path for now.
7499 xfree(node);
7500
7501 return new_node;
7502}
7503
7507static pm_string_node_t *
7508pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7509 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7510 pm_node_flags_t flags = 0;
7511
7512 switch (parser->frozen_string_literal) {
7513 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7514 flags = PM_STRING_FLAGS_MUTABLE;
7515 break;
7516 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7517 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7518 break;
7519 }
7520
7521 *new_node = (pm_string_node_t) {
7522 {
7523 .type = PM_STRING_NODE,
7524 .flags = flags,
7525 .node_id = PM_NODE_IDENTIFY(parser),
7526 .location = node->base.location
7527 },
7528 .opening_loc = node->opening_loc,
7529 .content_loc = node->value_loc,
7530 .closing_loc = node->closing_loc,
7531 .unescaped = node->unescaped
7532 };
7533
7534 // We are explicitly _not_ using pm_node_destroy here because we don't want
7535 // to trash the unescaped string. We could instead copy the string if we
7536 // know that it is owned, but we're taking the fast path for now.
7537 xfree(node);
7538
7539 return new_node;
7540}
7541
7545static pm_true_node_t *
7546pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7547 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7548 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7549
7550 *node = (pm_true_node_t) {{
7551 .type = PM_TRUE_NODE,
7552 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7553 .node_id = PM_NODE_IDENTIFY(parser),
7554 .location = PM_LOCATION_TOKEN_VALUE(token)
7555 }};
7556
7557 return node;
7558}
7559
7563static pm_true_node_t *
7564pm_true_node_synthesized_create(pm_parser_t *parser) {
7565 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7566
7567 *node = (pm_true_node_t) {{
7568 .type = PM_TRUE_NODE,
7569 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7570 .node_id = PM_NODE_IDENTIFY(parser),
7571 .location = { .start = parser->start, .end = parser->end }
7572 }};
7573
7574 return node;
7575}
7576
7580static pm_undef_node_t *
7581pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7582 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7583 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7584
7585 *node = (pm_undef_node_t) {
7586 {
7587 .type = PM_UNDEF_NODE,
7588 .node_id = PM_NODE_IDENTIFY(parser),
7589 .location = PM_LOCATION_TOKEN_VALUE(token),
7590 },
7591 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7592 .names = { 0 }
7593 };
7594
7595 return node;
7596}
7597
7601static void
7602pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7603 node->base.location.end = name->location.end;
7604 pm_node_list_append(&node->names, name);
7605}
7606
7610static pm_unless_node_t *
7611pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7612 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7613 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7614
7615 const uint8_t *end;
7616 if (statements != NULL) {
7617 end = statements->base.location.end;
7618 } else {
7619 end = predicate->location.end;
7620 }
7621
7622 *node = (pm_unless_node_t) {
7623 {
7624 .type = PM_UNLESS_NODE,
7625 .flags = PM_NODE_FLAG_NEWLINE,
7626 .node_id = PM_NODE_IDENTIFY(parser),
7627 .location = {
7628 .start = keyword->start,
7629 .end = end
7630 },
7631 },
7632 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7633 .predicate = predicate,
7634 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7635 .statements = statements,
7636 .else_clause = NULL,
7637 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7638 };
7639
7640 return node;
7641}
7642
7646static pm_unless_node_t *
7647pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7648 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7649 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7650
7651 pm_statements_node_t *statements = pm_statements_node_create(parser);
7652 pm_statements_node_body_append(parser, statements, statement, true);
7653
7654 *node = (pm_unless_node_t) {
7655 {
7656 .type = PM_UNLESS_NODE,
7657 .flags = PM_NODE_FLAG_NEWLINE,
7658 .node_id = PM_NODE_IDENTIFY(parser),
7659 .location = {
7660 .start = statement->location.start,
7661 .end = predicate->location.end
7662 },
7663 },
7664 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7665 .predicate = predicate,
7666 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7667 .statements = statements,
7668 .else_clause = NULL,
7669 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7670 };
7671
7672 return node;
7673}
7674
7675static inline void
7676pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7677 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7678 node->base.location.end = end_keyword->end;
7679}
7680
7686static void
7687pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7688 assert(parser->current_block_exits != NULL);
7689
7690 // All of the block exits that we want to remove should be within the
7691 // statements, and since we are modifying the statements, we shouldn't have
7692 // to check the end location.
7693 const uint8_t *start = statements->base.location.start;
7694
7695 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7696 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7697 if (block_exit->location.start < start) break;
7698
7699 // Implicitly remove from the list by lowering the size.
7700 parser->current_block_exits->size--;
7701 }
7702}
7703
7707static pm_until_node_t *
7708pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7709 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7710 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7711
7712 *node = (pm_until_node_t) {
7713 {
7714 .type = PM_UNTIL_NODE,
7715 .flags = flags,
7716 .node_id = PM_NODE_IDENTIFY(parser),
7717 .location = {
7718 .start = keyword->start,
7719 .end = closing->end,
7720 },
7721 },
7722 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7723 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7724 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7725 .predicate = predicate,
7726 .statements = statements
7727 };
7728
7729 return node;
7730}
7731
7735static pm_until_node_t *
7736pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7737 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7738 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7739 pm_loop_modifier_block_exits(parser, statements);
7740
7741 *node = (pm_until_node_t) {
7742 {
7743 .type = PM_UNTIL_NODE,
7744 .flags = flags,
7745 .node_id = PM_NODE_IDENTIFY(parser),
7746 .location = {
7747 .start = statements->base.location.start,
7748 .end = predicate->location.end,
7749 },
7750 },
7751 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7752 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7753 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7754 .predicate = predicate,
7755 .statements = statements
7756 };
7757
7758 return node;
7759}
7760
7764static pm_when_node_t *
7765pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7766 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7767
7768 *node = (pm_when_node_t) {
7769 {
7770 .type = PM_WHEN_NODE,
7771 .node_id = PM_NODE_IDENTIFY(parser),
7772 .location = {
7773 .start = keyword->start,
7774 .end = NULL
7775 }
7776 },
7777 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7778 .statements = NULL,
7779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7780 .conditions = { 0 }
7781 };
7782
7783 return node;
7784}
7785
7789static void
7790pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7791 node->base.location.end = condition->location.end;
7792 pm_node_list_append(&node->conditions, condition);
7793}
7794
7798static inline void
7799pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7800 node->base.location.end = then_keyword->end;
7801 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7802}
7803
7807static void
7808pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7809 if (statements->base.location.end > node->base.location.end) {
7810 node->base.location.end = statements->base.location.end;
7811 }
7812
7813 node->statements = statements;
7814}
7815
7819static pm_while_node_t *
7820pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7821 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7822 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7823
7824 *node = (pm_while_node_t) {
7825 {
7826 .type = PM_WHILE_NODE,
7827 .flags = flags,
7828 .node_id = PM_NODE_IDENTIFY(parser),
7829 .location = {
7830 .start = keyword->start,
7831 .end = closing->end
7832 },
7833 },
7834 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7835 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7836 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7837 .predicate = predicate,
7838 .statements = statements
7839 };
7840
7841 return node;
7842}
7843
7847static pm_while_node_t *
7848pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7849 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7850 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7851 pm_loop_modifier_block_exits(parser, statements);
7852
7853 *node = (pm_while_node_t) {
7854 {
7855 .type = PM_WHILE_NODE,
7856 .flags = flags,
7857 .node_id = PM_NODE_IDENTIFY(parser),
7858 .location = {
7859 .start = statements->base.location.start,
7860 .end = predicate->location.end
7861 },
7862 },
7863 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7864 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7865 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7866 .predicate = predicate,
7867 .statements = statements
7868 };
7869
7870 return node;
7871}
7872
7876static pm_while_node_t *
7877pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7878 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7879
7880 *node = (pm_while_node_t) {
7881 {
7882 .type = PM_WHILE_NODE,
7883 .node_id = PM_NODE_IDENTIFY(parser),
7884 .location = PM_LOCATION_NULL_VALUE(parser)
7885 },
7886 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7887 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7888 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7889 .predicate = predicate,
7890 .statements = statements
7891 };
7892
7893 return node;
7894}
7895
7900static pm_x_string_node_t *
7901pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7902 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7903
7904 *node = (pm_x_string_node_t) {
7905 {
7906 .type = PM_X_STRING_NODE,
7907 .flags = PM_STRING_FLAGS_FROZEN,
7908 .node_id = PM_NODE_IDENTIFY(parser),
7909 .location = {
7910 .start = opening->start,
7911 .end = closing->end
7912 },
7913 },
7914 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7915 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7916 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7917 .unescaped = *unescaped
7918 };
7919
7920 return node;
7921}
7922
7926static inline pm_x_string_node_t *
7927pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7928 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7929}
7930
7934static pm_yield_node_t *
7935pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7936 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7937
7938 const uint8_t *end;
7939 if (rparen_loc->start != NULL) {
7940 end = rparen_loc->end;
7941 } else if (arguments != NULL) {
7942 end = arguments->base.location.end;
7943 } else if (lparen_loc->start != NULL) {
7944 end = lparen_loc->end;
7945 } else {
7946 end = keyword->end;
7947 }
7948
7949 *node = (pm_yield_node_t) {
7950 {
7951 .type = PM_YIELD_NODE,
7952 .node_id = PM_NODE_IDENTIFY(parser),
7953 .location = {
7954 .start = keyword->start,
7955 .end = end
7956 },
7957 },
7958 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7959 .lparen_loc = *lparen_loc,
7960 .arguments = arguments,
7961 .rparen_loc = *rparen_loc
7962 };
7963
7964 return node;
7965}
7966
7967#undef PM_NODE_ALLOC
7968#undef PM_NODE_IDENTIFY
7969
7974static int
7975pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7976 pm_scope_t *scope = parser->current_scope;
7977 int depth = 0;
7978
7979 while (scope != NULL) {
7980 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7981 if (scope->closed) break;
7982
7983 scope = scope->previous;
7984 depth++;
7985 }
7986
7987 return -1;
7988}
7989
7995static inline int
7996pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7997 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7998}
7999
8003static inline void
8004pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8005 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8006}
8007
8011static pm_constant_id_t
8012pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8013 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8014 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8015 return constant_id;
8016}
8017
8021static inline pm_constant_id_t
8022pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8023 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8024}
8025
8029static pm_constant_id_t
8030pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8031 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8032 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8033 return constant_id;
8034}
8035
8039static pm_constant_id_t
8040pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8041 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8042 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8043 return constant_id;
8044}
8045
8053static bool
8054pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8055 // We want to check whether the parameter name is a numbered parameter or
8056 // not.
8057 pm_refute_numbered_parameter(parser, name->start, name->end);
8058
8059 // Otherwise we'll fetch the constant id for the parameter name and check
8060 // whether it's already in the current scope.
8061 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8062
8063 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8064 // Add an error if the parameter doesn't start with _ and has been seen before
8065 if ((name->start < name->end) && (*name->start != '_')) {
8066 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8067 }
8068 return true;
8069 }
8070 return false;
8071}
8072
8076static void
8077pm_parser_scope_pop(pm_parser_t *parser) {
8078 pm_scope_t *scope = parser->current_scope;
8079 parser->current_scope = scope->previous;
8080 pm_locals_free(&scope->locals);
8081 pm_node_list_free(&scope->implicit_parameters);
8082 xfree(scope);
8083}
8084
8085/******************************************************************************/
8086/* Stack helpers */
8087/******************************************************************************/
8088
8092static inline void
8093pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8094 *stack = (*stack << 1) | (value & 1);
8095}
8096
8100static inline void
8101pm_state_stack_pop(pm_state_stack_t *stack) {
8102 *stack >>= 1;
8103}
8104
8108static inline bool
8109pm_state_stack_p(const pm_state_stack_t *stack) {
8110 return *stack & 1;
8111}
8112
8113static inline void
8114pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8115 // Use the negation of the value to prevent stack overflow.
8116 pm_state_stack_push(&parser->accepts_block_stack, !value);
8117}
8118
8119static inline void
8120pm_accepts_block_stack_pop(pm_parser_t *parser) {
8121 pm_state_stack_pop(&parser->accepts_block_stack);
8122}
8123
8124static inline bool
8125pm_accepts_block_stack_p(pm_parser_t *parser) {
8126 return !pm_state_stack_p(&parser->accepts_block_stack);
8127}
8128
8129static inline void
8130pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8131 pm_state_stack_push(&parser->do_loop_stack, value);
8132}
8133
8134static inline void
8135pm_do_loop_stack_pop(pm_parser_t *parser) {
8136 pm_state_stack_pop(&parser->do_loop_stack);
8137}
8138
8139static inline bool
8140pm_do_loop_stack_p(pm_parser_t *parser) {
8141 return pm_state_stack_p(&parser->do_loop_stack);
8142}
8143
8144/******************************************************************************/
8145/* Lexer check helpers */
8146/******************************************************************************/
8147
8152static inline uint8_t
8153peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8154 if (cursor < parser->end) {
8155 return *cursor;
8156 } else {
8157 return '\0';
8158 }
8159}
8160
8166static inline uint8_t
8167peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8168 return peek_at(parser, parser->current.end + offset);
8169}
8170
8175static inline uint8_t
8176peek(const pm_parser_t *parser) {
8177 return peek_at(parser, parser->current.end);
8178}
8179
8184static inline bool
8185match(pm_parser_t *parser, uint8_t value) {
8186 if (peek(parser) == value) {
8187 parser->current.end++;
8188 return true;
8189 }
8190 return false;
8191}
8192
8197static inline size_t
8198match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8199 if (peek_at(parser, cursor) == '\n') {
8200 return 1;
8201 }
8202 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8203 return 2;
8204 }
8205 return 0;
8206}
8207
8213static inline size_t
8214match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8215 return match_eol_at(parser, parser->current.end + offset);
8216}
8217
8223static inline size_t
8224match_eol(pm_parser_t *parser) {
8225 return match_eol_at(parser, parser->current.end);
8226}
8227
8231static inline const uint8_t *
8232next_newline(const uint8_t *cursor, ptrdiff_t length) {
8233 assert(length >= 0);
8234
8235 // Note that it's okay for us to use memchr here to look for \n because none
8236 // of the encodings that we support have \n as a component of a multi-byte
8237 // character.
8238 return memchr(cursor, '\n', (size_t) length);
8239}
8240
8244static inline bool
8245ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8246 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8247}
8248
8253static bool
8254parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8255 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8256
8257 if (encoding != NULL) {
8258 if (parser->encoding != encoding) {
8259 parser->encoding = encoding;
8260 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8261 }
8262
8263 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8264 return true;
8265 }
8266
8267 return false;
8268}
8269
8274static void
8275parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8276 const uint8_t *cursor = parser->current.start + 1;
8277 const uint8_t *end = parser->current.end;
8278
8279 bool separator = false;
8280 while (true) {
8281 if (end - cursor <= 6) return;
8282 switch (cursor[6]) {
8283 case 'C': case 'c': cursor += 6; continue;
8284 case 'O': case 'o': cursor += 5; continue;
8285 case 'D': case 'd': cursor += 4; continue;
8286 case 'I': case 'i': cursor += 3; continue;
8287 case 'N': case 'n': cursor += 2; continue;
8288 case 'G': case 'g': cursor += 1; continue;
8289 case '=': case ':':
8290 separator = true;
8291 cursor += 6;
8292 break;
8293 default:
8294 cursor += 6;
8295 if (pm_char_is_whitespace(*cursor)) break;
8296 continue;
8297 }
8298 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8299 separator = false;
8300 }
8301
8302 while (true) {
8303 do {
8304 if (++cursor >= end) return;
8305 } while (pm_char_is_whitespace(*cursor));
8306
8307 if (separator) break;
8308 if (*cursor != '=' && *cursor != ':') return;
8309
8310 separator = true;
8311 cursor++;
8312 }
8313
8314 const uint8_t *value_start = cursor;
8315 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8316
8317 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8318 // If we were unable to parse the encoding value, then we've got an
8319 // issue because we didn't understand the encoding that the user was
8320 // trying to use. In this case we'll keep using the default encoding but
8321 // add an error to the parser to indicate an unsuccessful parse.
8322 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8323 }
8324}
8325
8326typedef enum {
8327 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8328 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8329 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8330} pm_magic_comment_boolean_value_t;
8331
8336static pm_magic_comment_boolean_value_t
8337parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8338 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8339 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8340 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8341 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8342 } else {
8343 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8344 }
8345}
8346
8347static inline bool
8348pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8349 return b == '\'' || b == '"' || b == ':' || b == ';';
8350}
8351
8357static inline const uint8_t *
8358parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8359 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8360 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8361 return cursor;
8362 }
8363 cursor++;
8364 }
8365 return NULL;
8366}
8367
8378static inline bool
8379parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8380 bool result = true;
8381
8382 const uint8_t *start = parser->current.start + 1;
8383 const uint8_t *end = parser->current.end;
8384 if (end - start <= 7) return false;
8385
8386 const uint8_t *cursor;
8387 bool indicator = false;
8388
8389 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8390 start = cursor + 3;
8391
8392 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8393 end = cursor;
8394 indicator = true;
8395 } else {
8396 // If we have a start marker but not an end marker, then we cannot
8397 // have a magic comment.
8398 return false;
8399 }
8400 }
8401
8402 cursor = start;
8403 while (cursor < end) {
8404 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8405
8406 const uint8_t *key_start = cursor;
8407 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8408
8409 const uint8_t *key_end = cursor;
8410 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8411 if (cursor == end) break;
8412
8413 if (*cursor == ':') {
8414 cursor++;
8415 } else {
8416 if (!indicator) return false;
8417 continue;
8418 }
8419
8420 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8421 if (cursor == end) break;
8422
8423 const uint8_t *value_start;
8424 const uint8_t *value_end;
8425
8426 if (*cursor == '"') {
8427 value_start = ++cursor;
8428 for (; cursor < end && *cursor != '"'; cursor++) {
8429 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8430 }
8431 value_end = cursor;
8432 if (*cursor == '"') cursor++;
8433 } else {
8434 value_start = cursor;
8435 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8436 value_end = cursor;
8437 }
8438
8439 if (indicator) {
8440 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8441 } else {
8442 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8443 if (cursor != end) return false;
8444 }
8445
8446 // Here, we need to do some processing on the key to swap out dashes for
8447 // underscores. We only need to do this if there _is_ a dash in the key.
8448 pm_string_t key;
8449 const size_t key_length = (size_t) (key_end - key_start);
8450 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8451
8452 if (dash == NULL) {
8453 pm_string_shared_init(&key, key_start, key_end);
8454 } else {
8455 uint8_t *buffer = xmalloc(key_length);
8456 if (buffer == NULL) break;
8457
8458 memcpy(buffer, key_start, key_length);
8459 buffer[dash - key_start] = '_';
8460
8461 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8462 buffer[dash - key_start] = '_';
8463 }
8464
8465 pm_string_owned_init(&key, buffer, key_length);
8466 }
8467
8468 // Finally, we can start checking the key against the list of known
8469 // magic comment keys, and potentially change state based on that.
8470 const uint8_t *key_source = pm_string_source(&key);
8471 uint32_t value_length = (uint32_t) (value_end - value_start);
8472
8473 // We only want to attempt to compare against encoding comments if it's
8474 // the first line in the file (or the second in the case of a shebang).
8475 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8476 if (
8477 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8478 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8479 ) {
8480 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8481 }
8482 }
8483
8484 if (key_length == 11) {
8485 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8486 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8487 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8488 PM_PARSER_WARN_TOKEN_FORMAT(
8489 parser,
8490 parser->current,
8491 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8492 (int) key_length,
8493 (const char *) key_source,
8494 (int) value_length,
8495 (const char *) value_start
8496 );
8497 break;
8498 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8499 parser->warn_mismatched_indentation = false;
8500 break;
8501 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8502 parser->warn_mismatched_indentation = true;
8503 break;
8504 }
8505 }
8506 } else if (key_length == 21) {
8507 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8508 // We only want to handle frozen string literal comments if it's
8509 // before any semantic tokens have been seen.
8510 if (semantic_token_seen) {
8511 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8512 } else {
8513 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8514 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8515 PM_PARSER_WARN_TOKEN_FORMAT(
8516 parser,
8517 parser->current,
8518 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8519 (int) key_length,
8520 (const char *) key_source,
8521 (int) value_length,
8522 (const char *) value_start
8523 );
8524 break;
8525 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8527 break;
8528 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8530 break;
8531 }
8532 }
8533 }
8534 } else if (key_length == 24) {
8535 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8536 const uint8_t *cursor = parser->current.start;
8537 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8538
8539 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8540 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8541 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8542 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8543 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8544 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8545 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8546 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8547 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8548 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8549 } else {
8550 PM_PARSER_WARN_TOKEN_FORMAT(
8551 parser,
8552 parser->current,
8553 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8554 (int) key_length,
8555 (const char *) key_source,
8556 (int) value_length,
8557 (const char *) value_start
8558 );
8559 }
8560 }
8561 }
8562
8563 // When we're done, we want to free the string in case we had to
8564 // allocate memory for it.
8565 pm_string_free(&key);
8566
8567 // Allocate a new magic comment node to append to the parser's list.
8569 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8570 magic_comment->key_start = key_start;
8571 magic_comment->value_start = value_start;
8572 magic_comment->key_length = (uint32_t) key_length;
8573 magic_comment->value_length = value_length;
8574 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8575 }
8576 }
8577
8578 return result;
8579}
8580
8581/******************************************************************************/
8582/* Context manipulations */
8583/******************************************************************************/
8584
8585static const uint32_t context_terminators[] = {
8586 [PM_CONTEXT_NONE] = 0,
8587 [PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8588 [PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8589 [PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8590 [PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8591 [PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8592 [PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8593 [PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8594 [PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8595 [PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8596 [PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8597 [PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8598 [PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8599 [PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8600 [PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8601 [PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8602 [PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8603 [PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8604 [PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8605 [PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8606 [PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF),
8607 [PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF),
8608 [PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8609 [PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END),
8610 [PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8611 [PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END),
8612 [PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END),
8613 [PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN),
8614 [PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8615 [PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8616 [PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8617 [PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8618 [PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8619 [PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8620 [PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN),
8621 [PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF),
8622 [PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8623 [PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8624 [PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8625 [PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8626 [PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF),
8627 [PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8628 [PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8629 [PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON),
8630 [PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8631 [PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF),
8632 [PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8633 [PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8634 [PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8635 [PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8636 [PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF),
8637 [PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8638 [PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END),
8639 [PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END),
8640};
8641
8642static inline bool
8643context_terminator(pm_context_t context, pm_token_t *token) {
8644 return token->type < 32 && (context_terminators[context] & (1 << token->type));
8645}
8646
8651static pm_context_t
8652context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8653 pm_context_node_t *context_node = parser->current_context;
8654
8655 while (context_node != NULL) {
8656 if (context_terminator(context_node->context, token)) return context_node->context;
8657 context_node = context_node->prev;
8658 }
8659
8660 return PM_CONTEXT_NONE;
8661}
8662
8663static bool
8664context_push(pm_parser_t *parser, pm_context_t context) {
8665 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8666 if (context_node == NULL) return false;
8667
8668 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8669
8670 if (parser->current_context == NULL) {
8671 parser->current_context = context_node;
8672 } else {
8673 context_node->prev = parser->current_context;
8674 parser->current_context = context_node;
8675 }
8676
8677 return true;
8678}
8679
8680static void
8681context_pop(pm_parser_t *parser) {
8682 pm_context_node_t *prev = parser->current_context->prev;
8683 xfree(parser->current_context);
8684 parser->current_context = prev;
8685}
8686
8687static bool
8688context_p(const pm_parser_t *parser, pm_context_t context) {
8689 pm_context_node_t *context_node = parser->current_context;
8690
8691 while (context_node != NULL) {
8692 if (context_node->context == context) return true;
8693 context_node = context_node->prev;
8694 }
8695
8696 return false;
8697}
8698
8699static bool
8700context_def_p(const pm_parser_t *parser) {
8701 pm_context_node_t *context_node = parser->current_context;
8702
8703 while (context_node != NULL) {
8704 switch (context_node->context) {
8705 case PM_CONTEXT_DEF:
8710 return true;
8711 case PM_CONTEXT_CLASS:
8715 case PM_CONTEXT_MODULE:
8719 case PM_CONTEXT_SCLASS:
8723 return false;
8724 default:
8725 context_node = context_node->prev;
8726 }
8727 }
8728
8729 return false;
8730}
8731
8736static const char *
8737context_human(pm_context_t context) {
8738 switch (context) {
8739 case PM_CONTEXT_NONE:
8740 assert(false && "unreachable");
8741 return "";
8742 case PM_CONTEXT_BEGIN: return "begin statement";
8743 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8744 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8745 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8746 case PM_CONTEXT_CASE_IN: return "'in' clause";
8747 case PM_CONTEXT_CLASS: return "class definition";
8748 case PM_CONTEXT_DEF: return "method definition";
8749 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8750 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8751 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8752 case PM_CONTEXT_ELSE:
8759 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8760 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8761 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8768 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8769 case PM_CONTEXT_FOR: return "for loop";
8770 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8771 case PM_CONTEXT_IF: return "if statement";
8772 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8773 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8774 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8775 case PM_CONTEXT_MAIN: return "top level context";
8776 case PM_CONTEXT_MODULE: return "module definition";
8777 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8778 case PM_CONTEXT_PARENS: return "parentheses";
8779 case PM_CONTEXT_POSTEXE: return "'END' block";
8780 case PM_CONTEXT_PREDICATE: return "predicate";
8781 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8789 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8790 case PM_CONTEXT_SCLASS: return "singleton class definition";
8791 case PM_CONTEXT_TERNARY: return "ternary expression";
8792 case PM_CONTEXT_UNLESS: return "unless statement";
8793 case PM_CONTEXT_UNTIL: return "until statement";
8794 case PM_CONTEXT_WHILE: return "while statement";
8795 }
8796
8797 assert(false && "unreachable");
8798 return "";
8799}
8800
8801/******************************************************************************/
8802/* Specific token lexers */
8803/******************************************************************************/
8804
8805static inline void
8806pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8807 if (invalid != NULL) {
8808 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8809 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8810 }
8811}
8812
8813static size_t
8814pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8815 const uint8_t *invalid = NULL;
8816 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8817 pm_strspn_number_validate(parser, string, length, invalid);
8818 return length;
8819}
8820
8821static size_t
8822pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8823 const uint8_t *invalid = NULL;
8824 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8825 pm_strspn_number_validate(parser, string, length, invalid);
8826 return length;
8827}
8828
8829static size_t
8830pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8831 const uint8_t *invalid = NULL;
8832 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8833 pm_strspn_number_validate(parser, string, length, invalid);
8834 return length;
8835}
8836
8837static size_t
8838pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8839 const uint8_t *invalid = NULL;
8840 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8841 pm_strspn_number_validate(parser, string, length, invalid);
8842 return length;
8843}
8844
8845static pm_token_type_t
8846lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8847 pm_token_type_t type = PM_TOKEN_INTEGER;
8848
8849 // Here we're going to attempt to parse the optional decimal portion of a
8850 // float. If it's not there, then it's okay and we'll just continue on.
8851 if (peek(parser) == '.') {
8852 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8853 parser->current.end += 2;
8854 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8855 type = PM_TOKEN_FLOAT;
8856 } else {
8857 // If we had a . and then something else, then it's not a float
8858 // suffix on a number it's a method call or something else.
8859 return type;
8860 }
8861 }
8862
8863 // Here we're going to attempt to parse the optional exponent portion of a
8864 // float. If it's not there, it's okay and we'll just continue on.
8865 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8866 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8867 parser->current.end += 2;
8868
8869 if (pm_char_is_decimal_digit(peek(parser))) {
8870 parser->current.end++;
8871 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8872 } else {
8873 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8874 }
8875 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8876 parser->current.end++;
8877 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8878 } else {
8879 return type;
8880 }
8881
8882 *seen_e = true;
8883 type = PM_TOKEN_FLOAT;
8884 }
8885
8886 return type;
8887}
8888
8889static pm_token_type_t
8890lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8891 pm_token_type_t type = PM_TOKEN_INTEGER;
8892 *seen_e = false;
8893
8894 if (peek_offset(parser, -1) == '0') {
8895 switch (*parser->current.end) {
8896 // 0d1111 is a decimal number
8897 case 'd':
8898 case 'D':
8899 parser->current.end++;
8900 if (pm_char_is_decimal_digit(peek(parser))) {
8901 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8902 } else {
8903 match(parser, '_');
8904 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8905 }
8906
8907 break;
8908
8909 // 0b1111 is a binary number
8910 case 'b':
8911 case 'B':
8912 parser->current.end++;
8913 if (pm_char_is_binary_digit(peek(parser))) {
8914 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8915 } else {
8916 match(parser, '_');
8917 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8918 }
8919
8920 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8921 break;
8922
8923 // 0o1111 is an octal number
8924 case 'o':
8925 case 'O':
8926 parser->current.end++;
8927 if (pm_char_is_octal_digit(peek(parser))) {
8928 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8929 } else {
8930 match(parser, '_');
8931 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8932 }
8933
8934 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8935 break;
8936
8937 // 01111 is an octal number
8938 case '_':
8939 case '0':
8940 case '1':
8941 case '2':
8942 case '3':
8943 case '4':
8944 case '5':
8945 case '6':
8946 case '7':
8947 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8948 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8949 break;
8950
8951 // 0x1111 is a hexadecimal number
8952 case 'x':
8953 case 'X':
8954 parser->current.end++;
8955 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8956 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8957 } else {
8958 match(parser, '_');
8959 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8960 }
8961
8962 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8963 break;
8964
8965 // 0.xxx is a float
8966 case '.': {
8967 type = lex_optional_float_suffix(parser, seen_e);
8968 break;
8969 }
8970
8971 // 0exxx is a float
8972 case 'e':
8973 case 'E': {
8974 type = lex_optional_float_suffix(parser, seen_e);
8975 break;
8976 }
8977 }
8978 } else {
8979 // If it didn't start with a 0, then we'll lex as far as we can into a
8980 // decimal number.
8981 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8982
8983 // Afterward, we'll lex as far as we can into an optional float suffix.
8984 type = lex_optional_float_suffix(parser, seen_e);
8985 }
8986
8987 // At this point we have a completed number, but we want to provide the user
8988 // with a good experience if they put an additional .xxx fractional
8989 // component on the end, so we'll check for that here.
8990 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8991 const uint8_t *fraction_start = parser->current.end;
8992 const uint8_t *fraction_end = parser->current.end + 2;
8993 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8994 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8995 }
8996
8997 return type;
8998}
8999
9000static pm_token_type_t
9001lex_numeric(pm_parser_t *parser) {
9002 pm_token_type_t type = PM_TOKEN_INTEGER;
9003 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
9004
9005 if (parser->current.end < parser->end) {
9006 bool seen_e = false;
9007 type = lex_numeric_prefix(parser, &seen_e);
9008
9009 const uint8_t *end = parser->current.end;
9010 pm_token_type_t suffix_type = type;
9011
9012 if (type == PM_TOKEN_INTEGER) {
9013 if (match(parser, 'r')) {
9014 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9015
9016 if (match(parser, 'i')) {
9017 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
9018 }
9019 } else if (match(parser, 'i')) {
9020 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9021 }
9022 } else {
9023 if (!seen_e && match(parser, 'r')) {
9024 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9025
9026 if (match(parser, 'i')) {
9027 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9028 }
9029 } else if (match(parser, 'i')) {
9030 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9031 }
9032 }
9033
9034 const uint8_t b = peek(parser);
9035 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9036 parser->current.end = end;
9037 } else {
9038 type = suffix_type;
9039 }
9040 }
9041
9042 return type;
9043}
9044
9045static pm_token_type_t
9046lex_global_variable(pm_parser_t *parser) {
9047 if (parser->current.end >= parser->end) {
9048 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9049 return PM_TOKEN_GLOBAL_VARIABLE;
9050 }
9051
9052 // True if multiple characters are allowed after the declaration of the
9053 // global variable. Not true when it starts with "$-".
9054 bool allow_multiple = true;
9055
9056 switch (*parser->current.end) {
9057 case '~': // $~: match-data
9058 case '*': // $*: argv
9059 case '$': // $$: pid
9060 case '?': // $?: last status
9061 case '!': // $!: error string
9062 case '@': // $@: error position
9063 case '/': // $/: input record separator
9064 case '\\': // $\: output record separator
9065 case ';': // $;: field separator
9066 case ',': // $,: output field separator
9067 case '.': // $.: last read line number
9068 case '=': // $=: ignorecase
9069 case ':': // $:: load path
9070 case '<': // $<: reading filename
9071 case '>': // $>: default output handle
9072 case '\"': // $": already loaded files
9073 parser->current.end++;
9074 return PM_TOKEN_GLOBAL_VARIABLE;
9075
9076 case '&': // $&: last match
9077 case '`': // $`: string before last match
9078 case '\'': // $': string after last match
9079 case '+': // $+: string matches last paren.
9080 parser->current.end++;
9081 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9082
9083 case '0': {
9084 parser->current.end++;
9085 size_t width;
9086
9087 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9088 do {
9089 parser->current.end += width;
9090 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9091
9092 // $0 isn't allowed to be followed by anything.
9093 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9094 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9095 }
9096
9097 return PM_TOKEN_GLOBAL_VARIABLE;
9098 }
9099
9100 case '1':
9101 case '2':
9102 case '3':
9103 case '4':
9104 case '5':
9105 case '6':
9106 case '7':
9107 case '8':
9108 case '9':
9109 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9110 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9111
9112 case '-':
9113 parser->current.end++;
9114 allow_multiple = false;
9116 default: {
9117 size_t width;
9118
9119 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9120 do {
9121 parser->current.end += width;
9122 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9123 } else if (pm_char_is_whitespace(peek(parser))) {
9124 // If we get here, then we have a $ followed by whitespace,
9125 // which is not allowed.
9126 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9127 } else {
9128 // If we get here, then we have a $ followed by something that
9129 // isn't recognized as a global variable.
9130 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9131 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9132 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9133 }
9134
9135 return PM_TOKEN_GLOBAL_VARIABLE;
9136 }
9137 }
9138}
9139
9152static inline pm_token_type_t
9153lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9154 if (memcmp(current_start, value, vlen) == 0) {
9155 pm_lex_state_t last_state = parser->lex_state;
9156
9157 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9158 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9159 } else {
9160 lex_state_set(parser, state);
9161 if (state == PM_LEX_STATE_BEG) {
9162 parser->command_start = true;
9163 }
9164
9165 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9166 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9167 return modifier_type;
9168 }
9169 }
9170
9171 return type;
9172 }
9173
9174 return PM_TOKEN_EOF;
9175}
9176
9177static pm_token_type_t
9178lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9179 // Lex as far as we can into the current identifier.
9180 size_t width;
9181 const uint8_t *end = parser->end;
9182 const uint8_t *current_start = parser->current.start;
9183 const uint8_t *current_end = parser->current.end;
9184 bool encoding_changed = parser->encoding_changed;
9185
9186 if (encoding_changed) {
9187 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9188 current_end += width;
9189 }
9190 } else {
9191 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9192 current_end += width;
9193 }
9194 }
9195 parser->current.end = current_end;
9196
9197 // Now cache the length of the identifier so that we can quickly compare it
9198 // against known keywords.
9199 width = (size_t) (current_end - current_start);
9200
9201 if (current_end < end) {
9202 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9203 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9204 // check if we're returning the defined? keyword or just an identifier.
9205 width++;
9206
9207 if (
9208 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9209 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9210 ) {
9211 // If we're in a position where we can accept a : at the end of an
9212 // identifier, then we'll optionally accept it.
9213 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9214 (void) match(parser, ':');
9215 return PM_TOKEN_LABEL;
9216 }
9217
9218 if (parser->lex_state != PM_LEX_STATE_DOT) {
9219 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9220 return PM_TOKEN_KEYWORD_DEFINED;
9221 }
9222 }
9223
9224 return PM_TOKEN_METHOD_NAME;
9225 }
9226
9227 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9228 // If we're in a position where we can accept a = at the end of an
9229 // identifier, then we'll optionally accept it.
9230 return PM_TOKEN_IDENTIFIER;
9231 }
9232
9233 if (
9234 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9235 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9236 ) {
9237 // If we're in a position where we can accept a : at the end of an
9238 // identifier, then we'll optionally accept it.
9239 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9240 (void) match(parser, ':');
9241 return PM_TOKEN_LABEL;
9242 }
9243 }
9244
9245 if (parser->lex_state != PM_LEX_STATE_DOT) {
9246 pm_token_type_t type;
9247 switch (width) {
9248 case 2:
9249 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9250 if (pm_do_loop_stack_p(parser)) {
9251 return PM_TOKEN_KEYWORD_DO_LOOP;
9252 }
9253 return PM_TOKEN_KEYWORD_DO;
9254 }
9255
9256 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9257 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9258 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9259 break;
9260 case 3:
9261 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9262 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9263 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9267 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9268 break;
9269 case 4:
9270 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 break;
9279 case 5:
9280 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 break;
9294 case 6:
9295 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9298 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9299 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9300 break;
9301 case 8:
9302 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 break;
9305 case 12:
9306 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 break;
9308 }
9309 }
9310
9311 if (encoding_changed) {
9312 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9313 }
9314 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9315}
9316
9321static bool
9322current_token_starts_line(pm_parser_t *parser) {
9323 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9324}
9325
9340static pm_token_type_t
9341lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9342 // If there is no content following this #, then we're at the end of
9343 // the string and we can safely return string content.
9344 if (pound + 1 >= parser->end) {
9345 parser->current.end = pound + 1;
9346 return PM_TOKEN_STRING_CONTENT;
9347 }
9348
9349 // Now we'll check against the character that follows the #. If it constitutes
9350 // valid interplation, we'll handle that, otherwise we'll return
9351 // PM_TOKEN_NOT_PROVIDED.
9352 switch (pound[1]) {
9353 case '@': {
9354 // In this case we may have hit an embedded instance or class variable.
9355 if (pound + 2 >= parser->end) {
9356 parser->current.end = pound + 1;
9357 return PM_TOKEN_STRING_CONTENT;
9358 }
9359
9360 // If we're looking at a @ and there's another @, then we'll skip past the
9361 // second @.
9362 const uint8_t *variable = pound + 2;
9363 if (*variable == '@' && pound + 3 < parser->end) variable++;
9364
9365 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9366 // At this point we're sure that we've either hit an embedded instance
9367 // or class variable. In this case we'll first need to check if we've
9368 // already consumed content.
9369 if (pound > parser->current.start) {
9370 parser->current.end = pound;
9371 return PM_TOKEN_STRING_CONTENT;
9372 }
9373
9374 // Otherwise we need to return the embedded variable token
9375 // and then switch to the embedded variable lex mode.
9376 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9377 parser->current.end = pound + 1;
9378 return PM_TOKEN_EMBVAR;
9379 }
9380
9381 // If we didn't get a valid interpolation, then this is just regular
9382 // string content. This is like if we get "#@-". In this case the caller
9383 // should keep lexing.
9384 parser->current.end = pound + 1;
9385 return PM_TOKEN_NOT_PROVIDED;
9386 }
9387 case '$':
9388 // In this case we may have hit an embedded global variable. If there's
9389 // not enough room, then we'll just return string content.
9390 if (pound + 2 >= parser->end) {
9391 parser->current.end = pound + 1;
9392 return PM_TOKEN_STRING_CONTENT;
9393 }
9394
9395 // This is the character that we're going to check to see if it is the
9396 // start of an identifier that would indicate that this is a global
9397 // variable.
9398 const uint8_t *check = pound + 2;
9399
9400 if (pound[2] == '-') {
9401 if (pound + 3 >= parser->end) {
9402 parser->current.end = pound + 2;
9403 return PM_TOKEN_STRING_CONTENT;
9404 }
9405
9406 check++;
9407 }
9408
9409 // If the character that we're going to check is the start of an
9410 // identifier, or we don't have a - and the character is a decimal number
9411 // or a global name punctuation character, then we've hit an embedded
9412 // global variable.
9413 if (
9414 char_is_identifier_start(parser, check, parser->end - check) ||
9415 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9416 ) {
9417 // In this case we've hit an embedded global variable. First check to
9418 // see if we've already consumed content. If we have, then we need to
9419 // return that content as string content first.
9420 if (pound > parser->current.start) {
9421 parser->current.end = pound;
9422 return PM_TOKEN_STRING_CONTENT;
9423 }
9424
9425 // Otherwise, we need to return the embedded variable token and switch
9426 // to the embedded variable lex mode.
9427 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9428 parser->current.end = pound + 1;
9429 return PM_TOKEN_EMBVAR;
9430 }
9431
9432 // In this case we've hit a #$ that does not indicate a global variable.
9433 // In this case we'll continue lexing past it.
9434 parser->current.end = pound + 1;
9435 return PM_TOKEN_NOT_PROVIDED;
9436 case '{':
9437 // In this case it's the start of an embedded expression. If we have
9438 // already consumed content, then we need to return that content as string
9439 // content first.
9440 if (pound > parser->current.start) {
9441 parser->current.end = pound;
9442 return PM_TOKEN_STRING_CONTENT;
9443 }
9444
9445 parser->enclosure_nesting++;
9446
9447 // Otherwise we'll skip past the #{ and begin lexing the embedded
9448 // expression.
9449 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9450 parser->current.end = pound + 2;
9451 parser->command_start = true;
9452 pm_do_loop_stack_push(parser, false);
9453 return PM_TOKEN_EMBEXPR_BEGIN;
9454 default:
9455 // In this case we've hit a # that doesn't constitute interpolation. We'll
9456 // mark that by returning the not provided token type. This tells the
9457 // consumer to keep lexing forward.
9458 parser->current.end = pound + 1;
9459 return PM_TOKEN_NOT_PROVIDED;
9460 }
9461}
9462
9463static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9464static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9465static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9466static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9467static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9468
9472static const bool ascii_printable_chars[] = {
9473 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9481};
9482
9483static inline bool
9484char_is_ascii_printable(const uint8_t b) {
9485 return (b < 0x80) && ascii_printable_chars[b];
9486}
9487
9492static inline uint8_t
9493escape_hexadecimal_digit(const uint8_t value) {
9494 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9495}
9496
9502static inline uint32_t
9503escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9504 uint32_t value = 0;
9505 for (size_t index = 0; index < length; index++) {
9506 if (index != 0) value <<= 4;
9507 value |= escape_hexadecimal_digit(string[index]);
9508 }
9509
9510 // Here we're going to verify that the value is actually a valid Unicode
9511 // codepoint and not a surrogate pair.
9512 if (value >= 0xD800 && value <= 0xDFFF) {
9513 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9514 return 0xFFFD;
9515 }
9516
9517 return value;
9518}
9519
9523static inline uint8_t
9524escape_byte(uint8_t value, const uint8_t flags) {
9525 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9526 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9527 return value;
9528}
9529
9533static inline void
9534escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9535 // \u escape sequences in string-like structures implicitly change the
9536 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9537 // literal.
9538 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9539 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9540 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9541 }
9542
9544 }
9545
9546 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9547 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9548 pm_buffer_append_byte(buffer, 0xEF);
9549 pm_buffer_append_byte(buffer, 0xBF);
9550 pm_buffer_append_byte(buffer, 0xBD);
9551 }
9552}
9553
9558static inline void
9559escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9560 if (byte >= 0x80) {
9561 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9562 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9563 }
9564
9565 parser->explicit_encoding = parser->encoding;
9566 }
9567
9568 pm_buffer_append_byte(buffer, byte);
9569}
9570
9586static inline void
9587escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9588 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9589 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9590 }
9591
9592 escape_write_byte_encoded(parser, buffer, byte);
9593}
9594
9598static inline void
9599escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9600 size_t width;
9601 if (parser->encoding_changed) {
9602 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9603 } else {
9604 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9605 }
9606
9607 if (width == 1) {
9608 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9609 } else if (width > 1) {
9610 // Valid multibyte character. Just ignore escape.
9611 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9612 pm_buffer_append_bytes(b, parser->current.end, width);
9613 parser->current.end += width;
9614 } else {
9615 // Assume the next character wasn't meant to be part of this escape
9616 // sequence since it is invalid. Add an error and move on.
9617 parser->current.end++;
9618 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9619 }
9620}
9621
9627static void
9628escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9629#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9630
9631 PM_PARSER_WARN_TOKEN_FORMAT(
9632 parser,
9633 parser->current,
9634 PM_WARN_INVALID_CHARACTER,
9635 FLAG(flags),
9636 FLAG(flag),
9637 type
9638 );
9639
9640#undef FLAG
9641}
9642
9646static void
9647escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9648 uint8_t peeked = peek(parser);
9649 switch (peeked) {
9650 case '\\': {
9651 parser->current.end++;
9652 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9653 return;
9654 }
9655 case '\'': {
9656 parser->current.end++;
9657 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9658 return;
9659 }
9660 case 'a': {
9661 parser->current.end++;
9662 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9663 return;
9664 }
9665 case 'b': {
9666 parser->current.end++;
9667 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9668 return;
9669 }
9670 case 'e': {
9671 parser->current.end++;
9672 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9673 return;
9674 }
9675 case 'f': {
9676 parser->current.end++;
9677 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9678 return;
9679 }
9680 case 'n': {
9681 parser->current.end++;
9682 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9683 return;
9684 }
9685 case 'r': {
9686 parser->current.end++;
9687 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9688 return;
9689 }
9690 case 's': {
9691 parser->current.end++;
9692 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9693 return;
9694 }
9695 case 't': {
9696 parser->current.end++;
9697 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9698 return;
9699 }
9700 case 'v': {
9701 parser->current.end++;
9702 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9703 return;
9704 }
9705 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9706 uint8_t value = (uint8_t) (*parser->current.end - '0');
9707 parser->current.end++;
9708
9709 if (pm_char_is_octal_digit(peek(parser))) {
9710 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9711 parser->current.end++;
9712
9713 if (pm_char_is_octal_digit(peek(parser))) {
9714 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9715 parser->current.end++;
9716 }
9717 }
9718
9719 value = escape_byte(value, flags);
9720 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9721 return;
9722 }
9723 case 'x': {
9724 const uint8_t *start = parser->current.end - 1;
9725
9726 parser->current.end++;
9727 uint8_t byte = peek(parser);
9728
9729 if (pm_char_is_hexadecimal_digit(byte)) {
9730 uint8_t value = escape_hexadecimal_digit(byte);
9731 parser->current.end++;
9732
9733 byte = peek(parser);
9734 if (pm_char_is_hexadecimal_digit(byte)) {
9735 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9736 parser->current.end++;
9737 }
9738
9739 value = escape_byte(value, flags);
9740 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9741 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9742 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9743 } else {
9744 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9745 }
9746 }
9747
9748 escape_write_byte_encoded(parser, buffer, value);
9749 } else {
9750 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9751 }
9752
9753 return;
9754 }
9755 case 'u': {
9756 const uint8_t *start = parser->current.end - 1;
9757 parser->current.end++;
9758
9759 if (parser->current.end == parser->end) {
9760 const uint8_t *start = parser->current.end - 2;
9761 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9762 } else if (peek(parser) == '{') {
9763 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9764 parser->current.end++;
9765
9766 size_t whitespace;
9767 while (true) {
9768 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9769 parser->current.end += whitespace;
9770 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9771 // This is super hacky, but it gets us nicer error
9772 // messages because we can still pass it off to the
9773 // regular expression engine even if we hit an
9774 // unterminated regular expression.
9775 parser->current.end += 2;
9776 } else {
9777 break;
9778 }
9779 }
9780
9781 const uint8_t *extra_codepoints_start = NULL;
9782 int codepoints_count = 0;
9783
9784 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9785 const uint8_t *unicode_start = parser->current.end;
9786 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9787
9788 if (hexadecimal_length > 6) {
9789 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9790 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9791 } else if (hexadecimal_length == 0) {
9792 // there are not hexadecimal characters
9793
9794 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9795 // If this is a regular expression, we are going to
9796 // let the regular expression engine handle this
9797 // error instead of us.
9798 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9799 } else {
9800 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9801 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9802 }
9803
9804 return;
9805 }
9806
9807 parser->current.end += hexadecimal_length;
9808 codepoints_count++;
9809 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9810 extra_codepoints_start = unicode_start;
9811 }
9812
9813 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9814 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9815
9816 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9817 }
9818
9819 // ?\u{nnnn} character literal should contain only one codepoint
9820 // and cannot be like ?\u{nnnn mmmm}.
9821 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9822 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9823 }
9824
9825 if (parser->current.end == parser->end) {
9826 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9827 } else if (peek(parser) == '}') {
9828 parser->current.end++;
9829 } else {
9830 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9831 // If this is a regular expression, we are going to let
9832 // the regular expression engine handle this error
9833 // instead of us.
9834 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9835 } else {
9836 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9837 }
9838 }
9839
9840 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9841 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9842 }
9843 } else {
9844 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9845
9846 if (length == 0) {
9847 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9848 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9849 } else {
9850 const uint8_t *start = parser->current.end - 2;
9851 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9852 }
9853 } else if (length == 4) {
9854 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9855
9856 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9857 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9858 }
9859
9860 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9861 parser->current.end += 4;
9862 } else {
9863 parser->current.end += length;
9864
9865 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9866 // If this is a regular expression, we are going to let
9867 // the regular expression engine handle this error
9868 // instead of us.
9869 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9870 } else {
9871 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9872 }
9873 }
9874 }
9875
9876 return;
9877 }
9878 case 'c': {
9879 parser->current.end++;
9880 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9881 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9882 }
9883
9884 if (parser->current.end == parser->end) {
9885 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9886 return;
9887 }
9888
9889 uint8_t peeked = peek(parser);
9890 switch (peeked) {
9891 case '?': {
9892 parser->current.end++;
9893 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9894 return;
9895 }
9896 case '\\':
9897 parser->current.end++;
9898
9899 if (match(parser, 'u') || match(parser, 'U')) {
9900 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9901 return;
9902 }
9903
9904 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9905 return;
9906 case ' ':
9907 parser->current.end++;
9908 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9909 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9910 return;
9911 case '\t':
9912 parser->current.end++;
9913 escape_read_warn(parser, flags, 0, "\\t");
9914 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9915 return;
9916 default: {
9917 if (!char_is_ascii_printable(peeked)) {
9918 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9919 return;
9920 }
9921
9922 parser->current.end++;
9923 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9924 return;
9925 }
9926 }
9927 }
9928 case 'C': {
9929 parser->current.end++;
9930 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9931 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9932 }
9933
9934 if (peek(parser) != '-') {
9935 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9936 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9937 return;
9938 }
9939
9940 parser->current.end++;
9941 if (parser->current.end == parser->end) {
9942 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9943 return;
9944 }
9945
9946 uint8_t peeked = peek(parser);
9947 switch (peeked) {
9948 case '?': {
9949 parser->current.end++;
9950 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9951 return;
9952 }
9953 case '\\':
9954 parser->current.end++;
9955
9956 if (match(parser, 'u') || match(parser, 'U')) {
9957 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9958 return;
9959 }
9960
9961 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9962 return;
9963 case ' ':
9964 parser->current.end++;
9965 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9966 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9967 return;
9968 case '\t':
9969 parser->current.end++;
9970 escape_read_warn(parser, flags, 0, "\\t");
9971 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9972 return;
9973 default: {
9974 if (!char_is_ascii_printable(peeked)) {
9975 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9976 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9977 return;
9978 }
9979
9980 parser->current.end++;
9981 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9982 return;
9983 }
9984 }
9985 }
9986 case 'M': {
9987 parser->current.end++;
9988 if (flags & PM_ESCAPE_FLAG_META) {
9989 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9990 }
9991
9992 if (peek(parser) != '-') {
9993 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9994 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9995 return;
9996 }
9997
9998 parser->current.end++;
9999 if (parser->current.end == parser->end) {
10000 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10001 return;
10002 }
10003
10004 uint8_t peeked = peek(parser);
10005 switch (peeked) {
10006 case '\\':
10007 parser->current.end++;
10008
10009 if (match(parser, 'u') || match(parser, 'U')) {
10010 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10011 return;
10012 }
10013
10014 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10015 return;
10016 case ' ':
10017 parser->current.end++;
10018 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10019 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10020 return;
10021 case '\t':
10022 parser->current.end++;
10023 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10024 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10025 return;
10026 default:
10027 if (!char_is_ascii_printable(peeked)) {
10028 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10029 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10030 return;
10031 }
10032
10033 parser->current.end++;
10034 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10035 return;
10036 }
10037 }
10038 case '\r': {
10039 if (peek_offset(parser, 1) == '\n') {
10040 parser->current.end += 2;
10041 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042 return;
10043 }
10045 }
10046 default: {
10047 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10048 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10049 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10050 return;
10051 }
10052 if (parser->current.end < parser->end) {
10053 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10054 } else {
10055 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10056 }
10057 return;
10058 }
10059 }
10060}
10061
10087static pm_token_type_t
10088lex_question_mark(pm_parser_t *parser) {
10089 if (lex_state_end_p(parser)) {
10090 lex_state_set(parser, PM_LEX_STATE_BEG);
10091 return PM_TOKEN_QUESTION_MARK;
10092 }
10093
10094 if (parser->current.end >= parser->end) {
10095 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10096 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10097 return PM_TOKEN_CHARACTER_LITERAL;
10098 }
10099
10100 if (pm_char_is_whitespace(*parser->current.end)) {
10101 lex_state_set(parser, PM_LEX_STATE_BEG);
10102 return PM_TOKEN_QUESTION_MARK;
10103 }
10104
10105 lex_state_set(parser, PM_LEX_STATE_BEG);
10106
10107 if (match(parser, '\\')) {
10108 lex_state_set(parser, PM_LEX_STATE_END);
10109
10110 pm_buffer_t buffer;
10111 pm_buffer_init_capacity(&buffer, 3);
10112
10113 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10114 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10115
10116 return PM_TOKEN_CHARACTER_LITERAL;
10117 } else {
10118 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10119
10120 // Ternary operators can have a ? immediately followed by an identifier
10121 // which starts with an underscore. We check for this case here.
10122 if (
10123 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10124 (
10125 (parser->current.end + encoding_width >= parser->end) ||
10126 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10127 )
10128 ) {
10129 lex_state_set(parser, PM_LEX_STATE_END);
10130 parser->current.end += encoding_width;
10131 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10132 return PM_TOKEN_CHARACTER_LITERAL;
10133 }
10134 }
10135
10136 return PM_TOKEN_QUESTION_MARK;
10137}
10138
10143static pm_token_type_t
10144lex_at_variable(pm_parser_t *parser) {
10145 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10146 const uint8_t *end = parser->end;
10147
10148 size_t width;
10149 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10150 parser->current.end += width;
10151
10152 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10153 parser->current.end += width;
10154 }
10155 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10156 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10157 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
10158 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10159 }
10160
10161 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10162 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10163 } else {
10164 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10165 pm_parser_err_token(parser, &parser->current, diag_id);
10166 }
10167
10168 // If we're lexing an embedded variable, then we need to pop back into the
10169 // parent lex context.
10170 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10171 lex_mode_pop(parser);
10172 }
10173
10174 return type;
10175}
10176
10180static inline void
10181parser_lex_callback(pm_parser_t *parser) {
10182 if (parser->lex_callback) {
10183 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10184 }
10185}
10186
10190static inline pm_comment_t *
10191parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10192 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10193 if (comment == NULL) return NULL;
10194
10195 *comment = (pm_comment_t) {
10196 .type = type,
10197 .location = { parser->current.start, parser->current.end }
10198 };
10199
10200 return comment;
10201}
10202
10208static pm_token_type_t
10209lex_embdoc(pm_parser_t *parser) {
10210 // First, lex out the EMBDOC_BEGIN token.
10211 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10212
10213 if (newline == NULL) {
10214 parser->current.end = parser->end;
10215 } else {
10216 pm_newline_list_append(&parser->newline_list, newline);
10217 parser->current.end = newline + 1;
10218 }
10219
10220 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10221 parser_lex_callback(parser);
10222
10223 // Now, create a comment that is going to be attached to the parser.
10224 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10225 if (comment == NULL) return PM_TOKEN_EOF;
10226
10227 // Now, loop until we find the end of the embedded documentation or the end
10228 // of the file.
10229 while (parser->current.end + 4 <= parser->end) {
10230 parser->current.start = parser->current.end;
10231
10232 // If we've hit the end of the embedded documentation then we'll return
10233 // that token here.
10234 if (
10235 (memcmp(parser->current.end, "=end", 4) == 0) &&
10236 (
10237 (parser->current.end + 4 == parser->end) || // end of file
10238 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10239 (parser->current.end[4] == '\0') || // NUL or end of script
10240 (parser->current.end[4] == '\004') || // ^D
10241 (parser->current.end[4] == '\032') // ^Z
10242 )
10243 ) {
10244 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10245
10246 if (newline == NULL) {
10247 parser->current.end = parser->end;
10248 } else {
10249 pm_newline_list_append(&parser->newline_list, newline);
10250 parser->current.end = newline + 1;
10251 }
10252
10253 parser->current.type = PM_TOKEN_EMBDOC_END;
10254 parser_lex_callback(parser);
10255
10256 comment->location.end = parser->current.end;
10257 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10258
10259 return PM_TOKEN_EMBDOC_END;
10260 }
10261
10262 // Otherwise, we'll parse until the end of the line and return a line of
10263 // embedded documentation.
10264 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10265
10266 if (newline == NULL) {
10267 parser->current.end = parser->end;
10268 } else {
10269 pm_newline_list_append(&parser->newline_list, newline);
10270 parser->current.end = newline + 1;
10271 }
10272
10273 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10274 parser_lex_callback(parser);
10275 }
10276
10277 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10278
10279 comment->location.end = parser->current.end;
10280 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10281
10282 return PM_TOKEN_EOF;
10283}
10284
10290static inline void
10291parser_lex_ignored_newline(pm_parser_t *parser) {
10292 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10293 parser_lex_callback(parser);
10294}
10295
10305static inline void
10306parser_flush_heredoc_end(pm_parser_t *parser) {
10307 assert(parser->heredoc_end <= parser->end);
10308 parser->next_start = parser->heredoc_end;
10309 parser->heredoc_end = NULL;
10310}
10311
10315static bool
10316parser_end_of_line_p(const pm_parser_t *parser) {
10317 const uint8_t *cursor = parser->current.end;
10318
10319 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10320 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10321 }
10322
10323 return true;
10324}
10325
10344typedef struct {
10350
10355 const uint8_t *cursor;
10357
10377
10381static inline void
10382pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10383 pm_buffer_append_byte(&token_buffer->buffer, byte);
10384}
10385
10386static inline void
10387pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10388 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10389}
10390
10394static inline size_t
10395parser_char_width(const pm_parser_t *parser) {
10396 size_t width;
10397 if (parser->encoding_changed) {
10398 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10399 } else {
10400 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10401 }
10402
10403 // TODO: If the character is invalid in the given encoding, then we'll just
10404 // push one byte into the buffer. This should actually be an error.
10405 return (width == 0 ? 1 : width);
10406}
10407
10411static void
10412pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10413 size_t width = parser_char_width(parser);
10414 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10415 parser->current.end += width;
10416}
10417
10418static void
10419pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10420 size_t width = parser_char_width(parser);
10421 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10422 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10423 parser->current.end += width;
10424}
10425
10426static bool
10427pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10428 for (size_t index = 0; index < length; index++) {
10429 if (value[index] & 0x80) return false;
10430 }
10431
10432 return true;
10433}
10434
10441static inline void
10442pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10443 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10444}
10445
10446static inline void
10447pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10448 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10449 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10450 pm_buffer_free(&token_buffer->regexp_buffer);
10451}
10452
10462static void
10463pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10464 if (token_buffer->cursor == NULL) {
10465 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10466 } else {
10467 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10468 pm_token_buffer_copy(parser, token_buffer);
10469 }
10470}
10471
10472static void
10473pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10474 if (token_buffer->base.cursor == NULL) {
10475 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10476 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10477 } else {
10478 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10479 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10480 pm_regexp_token_buffer_copy(parser, token_buffer);
10481 }
10482}
10483
10484#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10485
10494static void
10495pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10496 const uint8_t *start;
10497 if (token_buffer->cursor == NULL) {
10498 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10499 start = parser->current.start;
10500 } else {
10501 start = token_buffer->cursor;
10502 }
10503
10504 const uint8_t *end = parser->current.end - 1;
10505 assert(end >= start);
10506 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10507
10508 token_buffer->cursor = end;
10509}
10510
10511static void
10512pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10513 const uint8_t *start;
10514 if (token_buffer->base.cursor == NULL) {
10515 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10516 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10517 start = parser->current.start;
10518 } else {
10519 start = token_buffer->base.cursor;
10520 }
10521
10522 const uint8_t *end = parser->current.end - 1;
10523 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10524 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10525
10526 token_buffer->base.cursor = end;
10527}
10528
10529#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10530
10535static inline size_t
10536pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10537 size_t whitespace = 0;
10538
10539 switch (indent) {
10540 case PM_HEREDOC_INDENT_NONE:
10541 // Do nothing, we can't match a terminator with
10542 // indentation and there's no need to calculate common
10543 // whitespace.
10544 break;
10545 case PM_HEREDOC_INDENT_DASH:
10546 // Skip past inline whitespace.
10547 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10548 break;
10549 case PM_HEREDOC_INDENT_TILDE:
10550 // Skip past inline whitespace and calculate common
10551 // whitespace.
10552 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10553 if (**cursor == '\t') {
10554 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10555 } else {
10556 whitespace++;
10557 }
10558 (*cursor)++;
10559 }
10560
10561 break;
10562 }
10563
10564 return whitespace;
10565}
10566
10571static uint8_t
10572pm_lex_percent_delimiter(pm_parser_t *parser) {
10573 size_t eol_length = match_eol(parser);
10574
10575 if (eol_length) {
10576 if (parser->heredoc_end) {
10577 // If we have already lexed a heredoc, then the newline has already
10578 // been added to the list. In this case we want to just flush the
10579 // heredoc end.
10580 parser_flush_heredoc_end(parser);
10581 } else {
10582 // Otherwise, we'll add the newline to the list of newlines.
10583 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10584 }
10585
10586 uint8_t delimiter = *parser->current.end;
10587
10588 // If our delimiter is \r\n, we want to treat it as if it's \n.
10589 // For example, %\r\nfoo\r\n should be "foo"
10590 if (eol_length == 2) {
10591 delimiter = *(parser->current.end + 1);
10592 }
10593
10594 parser->current.end += eol_length;
10595 return delimiter;
10596 }
10597
10598 return *parser->current.end++;
10599}
10600
10605#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10606
10613static void
10614parser_lex(pm_parser_t *parser) {
10615 assert(parser->current.end <= parser->end);
10616 parser->previous = parser->current;
10617
10618 // This value mirrors cmd_state from CRuby.
10619 bool previous_command_start = parser->command_start;
10620 parser->command_start = false;
10621
10622 // This is used to communicate to the newline lexing function that we've
10623 // already seen a comment.
10624 bool lexed_comment = false;
10625
10626 // Here we cache the current value of the semantic token seen flag. This is
10627 // used to reset it in case we find a token that shouldn't flip this flag.
10628 unsigned int semantic_token_seen = parser->semantic_token_seen;
10629 parser->semantic_token_seen = true;
10630
10631 switch (parser->lex_modes.current->mode) {
10632 case PM_LEX_DEFAULT:
10633 case PM_LEX_EMBEXPR:
10634 case PM_LEX_EMBVAR:
10635
10636 // We have a specific named label here because we are going to jump back to
10637 // this location in the event that we have lexed a token that should not be
10638 // returned to the parser. This includes comments, ignored newlines, and
10639 // invalid tokens of some form.
10640 lex_next_token: {
10641 // If we have the special next_start pointer set, then we're going to jump
10642 // to that location and start lexing from there.
10643 if (parser->next_start != NULL) {
10644 parser->current.end = parser->next_start;
10645 parser->next_start = NULL;
10646 }
10647
10648 // This value mirrors space_seen from CRuby. It tracks whether or not
10649 // space has been eaten before the start of the next token.
10650 bool space_seen = false;
10651
10652 // First, we're going to skip past any whitespace at the front of the next
10653 // token.
10654 bool chomping = true;
10655 while (parser->current.end < parser->end && chomping) {
10656 switch (*parser->current.end) {
10657 case ' ':
10658 case '\t':
10659 case '\f':
10660 case '\v':
10661 parser->current.end++;
10662 space_seen = true;
10663 break;
10664 case '\r':
10665 if (match_eol_offset(parser, 1)) {
10666 chomping = false;
10667 } else {
10668 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10669 parser->current.end++;
10670 space_seen = true;
10671 }
10672 break;
10673 case '\\': {
10674 size_t eol_length = match_eol_offset(parser, 1);
10675 if (eol_length) {
10676 if (parser->heredoc_end) {
10677 parser->current.end = parser->heredoc_end;
10678 parser->heredoc_end = NULL;
10679 } else {
10680 parser->current.end += eol_length + 1;
10681 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10682 space_seen = true;
10683 }
10684 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10685 parser->current.end += 2;
10686 } else {
10687 chomping = false;
10688 }
10689
10690 break;
10691 }
10692 default:
10693 chomping = false;
10694 break;
10695 }
10696 }
10697
10698 // Next, we'll set to start of this token to be the current end.
10699 parser->current.start = parser->current.end;
10700
10701 // We'll check if we're at the end of the file. If we are, then we
10702 // need to return the EOF token.
10703 if (parser->current.end >= parser->end) {
10704 // If we hit EOF, but the EOF came immediately after a newline,
10705 // set the start of the token to the newline. This way any EOF
10706 // errors will be reported as happening on that line rather than
10707 // a line after. For example "foo(\n" should report an error
10708 // on line 1 even though EOF technically occurs on line 2.
10709 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10710 parser->current.start -= 1;
10711 }
10712 LEX(PM_TOKEN_EOF);
10713 }
10714
10715 // Finally, we'll check the current character to determine the next
10716 // token.
10717 switch (*parser->current.end++) {
10718 case '\0': // NUL or end of script
10719 case '\004': // ^D
10720 case '\032': // ^Z
10721 parser->current.end--;
10722 LEX(PM_TOKEN_EOF);
10723
10724 case '#': { // comments
10725 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10726 parser->current.end = ending == NULL ? parser->end : ending;
10727
10728 // If we found a comment while lexing, then we're going to
10729 // add it to the list of comments in the file and keep
10730 // lexing.
10731 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10732 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10733
10734 if (ending) parser->current.end++;
10735 parser->current.type = PM_TOKEN_COMMENT;
10736 parser_lex_callback(parser);
10737
10738 // Here, parse the comment to see if it's a magic comment
10739 // and potentially change state on the parser.
10740 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10741 ptrdiff_t length = parser->current.end - parser->current.start;
10742
10743 // If we didn't find a magic comment within the first
10744 // pass and we're at the start of the file, then we need
10745 // to do another pass to potentially find other patterns
10746 // for encoding comments.
10747 if (length >= 10 && !parser->encoding_locked) {
10748 parser_lex_magic_comment_encoding(parser);
10749 }
10750 }
10751
10752 lexed_comment = true;
10753 }
10755 case '\r':
10756 case '\n': {
10757 parser->semantic_token_seen = semantic_token_seen & 0x1;
10758 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10759
10760 if (eol_length) {
10761 // The only way you can have carriage returns in this
10762 // particular loop is if you have a carriage return
10763 // followed by a newline. In that case we'll just skip
10764 // over the carriage return and continue lexing, in
10765 // order to make it so that the newline token
10766 // encapsulates both the carriage return and the
10767 // newline. Note that we need to check that we haven't
10768 // already lexed a comment here because that falls
10769 // through into here as well.
10770 if (!lexed_comment) {
10771 parser->current.end += eol_length - 1; // skip CR
10772 }
10773
10774 if (parser->heredoc_end == NULL) {
10775 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10776 }
10777 }
10778
10779 if (parser->heredoc_end) {
10780 parser_flush_heredoc_end(parser);
10781 }
10782
10783 // If this is an ignored newline, then we can continue lexing after
10784 // calling the callback with the ignored newline token.
10785 switch (lex_state_ignored_p(parser)) {
10786 case PM_IGNORED_NEWLINE_NONE:
10787 break;
10788 case PM_IGNORED_NEWLINE_PATTERN:
10789 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10790 if (!lexed_comment) parser_lex_ignored_newline(parser);
10791 lex_state_set(parser, PM_LEX_STATE_BEG);
10792 parser->command_start = true;
10793 parser->current.type = PM_TOKEN_NEWLINE;
10794 return;
10795 }
10797 case PM_IGNORED_NEWLINE_ALL:
10798 if (!lexed_comment) parser_lex_ignored_newline(parser);
10799 lexed_comment = false;
10800 goto lex_next_token;
10801 }
10802
10803 // Here we need to look ahead and see if there is a call operator
10804 // (either . or &.) that starts the next line. If there is, then this
10805 // is going to become an ignored newline and we're going to instead
10806 // return the call operator.
10807 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10808 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10809
10810 if (next_content < parser->end) {
10811 // If we hit a comment after a newline, then we're going to check
10812 // if it's ignored or if it's followed by a method call ('.').
10813 // If it is, then we're going to call the
10814 // callback with an ignored newline and then continue lexing.
10815 // Otherwise we'll return a regular newline.
10816 if (next_content[0] == '#') {
10817 // Here we look for a "." or "&." following a "\n".
10818 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10819
10820 while (following && (following + 1 < parser->end)) {
10821 following++;
10822 following += pm_strspn_inline_whitespace(following, parser->end - following);
10823
10824 // If this is not followed by a comment, then we can break out
10825 // of this loop.
10826 if (peek_at(parser, following) != '#') break;
10827
10828 // If there is a comment, then we need to find the end of the
10829 // comment and continue searching from there.
10830 following = next_newline(following, parser->end - following);
10831 }
10832
10833 // If the lex state was ignored, we will lex the
10834 // ignored newline.
10835 if (lex_state_ignored_p(parser)) {
10836 if (!lexed_comment) parser_lex_ignored_newline(parser);
10837 lexed_comment = false;
10838 goto lex_next_token;
10839 }
10840
10841 // If we hit a '.' or a '&.' we will lex the ignored
10842 // newline.
10843 if (following && (
10844 (peek_at(parser, following) == '.') ||
10845 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10846 )) {
10847 if (!lexed_comment) parser_lex_ignored_newline(parser);
10848 lexed_comment = false;
10849 goto lex_next_token;
10850 }
10851
10852
10853 // If we are parsing as CRuby 3.5 or later and we
10854 // hit a '&&' or a '||' then we will lex the ignored
10855 // newline.
10856 if (
10858 following && (
10859 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10860 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10861 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
10862 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
10863 )
10864 ) {
10865 if (!lexed_comment) parser_lex_ignored_newline(parser);
10866 lexed_comment = false;
10867 goto lex_next_token;
10868 }
10869 }
10870
10871 // If we hit a . after a newline, then we're in a call chain and
10872 // we need to return the call operator.
10873 if (next_content[0] == '.') {
10874 // To match ripper, we need to emit an ignored newline even though
10875 // it's a real newline in the case that we have a beginless range
10876 // on a subsequent line.
10877 if (peek_at(parser, next_content + 1) == '.') {
10878 if (!lexed_comment) parser_lex_ignored_newline(parser);
10879 lex_state_set(parser, PM_LEX_STATE_BEG);
10880 parser->command_start = true;
10881 parser->current.type = PM_TOKEN_NEWLINE;
10882 return;
10883 }
10884
10885 if (!lexed_comment) parser_lex_ignored_newline(parser);
10886 lex_state_set(parser, PM_LEX_STATE_DOT);
10887 parser->current.start = next_content;
10888 parser->current.end = next_content + 1;
10889 parser->next_start = NULL;
10890 LEX(PM_TOKEN_DOT);
10891 }
10892
10893 // If we hit a &. after a newline, then we're in a call chain and
10894 // we need to return the call operator.
10895 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10896 if (!lexed_comment) parser_lex_ignored_newline(parser);
10897 lex_state_set(parser, PM_LEX_STATE_DOT);
10898 parser->current.start = next_content;
10899 parser->current.end = next_content + 2;
10900 parser->next_start = NULL;
10901 LEX(PM_TOKEN_AMPERSAND_DOT);
10902 }
10903
10904 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
10905 // If we hit an && then we are in a logical chain
10906 // and we need to return the logical operator.
10907 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10908 if (!lexed_comment) parser_lex_ignored_newline(parser);
10909 lex_state_set(parser, PM_LEX_STATE_BEG);
10910 parser->current.start = next_content;
10911 parser->current.end = next_content + 2;
10912 parser->next_start = NULL;
10913 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10914 }
10915
10916 // If we hit a || then we are in a logical chain and
10917 // we need to return the logical operator.
10918 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10919 if (!lexed_comment) parser_lex_ignored_newline(parser);
10920 lex_state_set(parser, PM_LEX_STATE_BEG);
10921 parser->current.start = next_content;
10922 parser->current.end = next_content + 2;
10923 parser->next_start = NULL;
10924 LEX(PM_TOKEN_PIPE_PIPE);
10925 }
10926
10927 // If we hit an 'and' then we are in a logical chain
10928 // and we need to return the logical operator.
10929 if (
10930 peek_at(parser, next_content) == 'a' &&
10931 peek_at(parser, next_content + 1) == 'n' &&
10932 peek_at(parser, next_content + 2) == 'd' &&
10933 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10934 ) {
10935 if (!lexed_comment) parser_lex_ignored_newline(parser);
10936 lex_state_set(parser, PM_LEX_STATE_BEG);
10937 parser->current.start = next_content;
10938 parser->current.end = next_content + 3;
10939 parser->next_start = NULL;
10940 parser->command_start = true;
10941 LEX(PM_TOKEN_KEYWORD_AND);
10942 }
10943
10944 // If we hit a 'or' then we are in a logical chain
10945 // and we need to return the logical operator.
10946 if (
10947 peek_at(parser, next_content) == 'o' &&
10948 peek_at(parser, next_content + 1) == 'r' &&
10949 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10950 ) {
10951 if (!lexed_comment) parser_lex_ignored_newline(parser);
10952 lex_state_set(parser, PM_LEX_STATE_BEG);
10953 parser->current.start = next_content;
10954 parser->current.end = next_content + 2;
10955 parser->next_start = NULL;
10956 parser->command_start = true;
10957 LEX(PM_TOKEN_KEYWORD_OR);
10958 }
10959 }
10960 }
10961
10962 // At this point we know this is a regular newline, and we can set the
10963 // necessary state and return the token.
10964 lex_state_set(parser, PM_LEX_STATE_BEG);
10965 parser->command_start = true;
10966 parser->current.type = PM_TOKEN_NEWLINE;
10967 if (!lexed_comment) parser_lex_callback(parser);
10968 return;
10969 }
10970
10971 // ,
10972 case ',':
10973 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10974 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10975 }
10976
10977 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10978 LEX(PM_TOKEN_COMMA);
10979
10980 // (
10981 case '(': {
10982 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10983
10984 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10985 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10986 }
10987
10988 parser->enclosure_nesting++;
10989 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10990 pm_do_loop_stack_push(parser, false);
10991 LEX(type);
10992 }
10993
10994 // )
10995 case ')':
10996 parser->enclosure_nesting--;
10997 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10998 pm_do_loop_stack_pop(parser);
10999 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
11000
11001 // ;
11002 case ';':
11003 lex_state_set(parser, PM_LEX_STATE_BEG);
11004 parser->command_start = true;
11005 LEX(PM_TOKEN_SEMICOLON);
11006
11007 // [ [] []=
11008 case '[':
11009 parser->enclosure_nesting++;
11010 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
11011
11012 if (lex_state_operator_p(parser)) {
11013 if (match(parser, ']')) {
11014 parser->enclosure_nesting--;
11015 lex_state_set(parser, PM_LEX_STATE_ARG);
11016 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
11017 }
11018
11019 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
11020 LEX(type);
11021 }
11022
11023 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
11024 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
11025 }
11026
11027 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11028 pm_do_loop_stack_push(parser, false);
11029 LEX(type);
11030
11031 // ]
11032 case ']':
11033 parser->enclosure_nesting--;
11034 lex_state_set(parser, PM_LEX_STATE_END);
11035 pm_do_loop_stack_pop(parser);
11036 LEX(PM_TOKEN_BRACKET_RIGHT);
11037
11038 // {
11039 case '{': {
11040 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
11041
11042 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
11043 // This { begins a lambda
11044 parser->command_start = true;
11045 lex_state_set(parser, PM_LEX_STATE_BEG);
11046 type = PM_TOKEN_LAMBDA_BEGIN;
11047 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
11048 // This { begins a hash literal
11049 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11050 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
11051 // This { begins a block
11052 parser->command_start = true;
11053 lex_state_set(parser, PM_LEX_STATE_BEG);
11054 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
11055 // This { begins a block on a command
11056 parser->command_start = true;
11057 lex_state_set(parser, PM_LEX_STATE_BEG);
11058 } else {
11059 // This { begins a hash literal
11060 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11061 }
11062
11063 parser->enclosure_nesting++;
11064 parser->brace_nesting++;
11065 pm_do_loop_stack_push(parser, false);
11066
11067 LEX(type);
11068 }
11069
11070 // }
11071 case '}':
11072 parser->enclosure_nesting--;
11073 pm_do_loop_stack_pop(parser);
11074
11075 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11076 lex_mode_pop(parser);
11077 LEX(PM_TOKEN_EMBEXPR_END);
11078 }
11079
11080 parser->brace_nesting--;
11081 lex_state_set(parser, PM_LEX_STATE_END);
11082 LEX(PM_TOKEN_BRACE_RIGHT);
11083
11084 // * ** **= *=
11085 case '*': {
11086 if (match(parser, '*')) {
11087 if (match(parser, '=')) {
11088 lex_state_set(parser, PM_LEX_STATE_BEG);
11089 LEX(PM_TOKEN_STAR_STAR_EQUAL);
11090 }
11091
11092 pm_token_type_t type = PM_TOKEN_STAR_STAR;
11093
11094 if (lex_state_spcarg_p(parser, space_seen)) {
11095 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11096 type = PM_TOKEN_USTAR_STAR;
11097 } else if (lex_state_beg_p(parser)) {
11098 type = PM_TOKEN_USTAR_STAR;
11099 } else if (ambiguous_operator_p(parser, space_seen)) {
11100 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11101 }
11102
11103 if (lex_state_operator_p(parser)) {
11104 lex_state_set(parser, PM_LEX_STATE_ARG);
11105 } else {
11106 lex_state_set(parser, PM_LEX_STATE_BEG);
11107 }
11108
11109 LEX(type);
11110 }
11111
11112 if (match(parser, '=')) {
11113 lex_state_set(parser, PM_LEX_STATE_BEG);
11114 LEX(PM_TOKEN_STAR_EQUAL);
11115 }
11116
11117 pm_token_type_t type = PM_TOKEN_STAR;
11118
11119 if (lex_state_spcarg_p(parser, space_seen)) {
11120 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11121 type = PM_TOKEN_USTAR;
11122 } else if (lex_state_beg_p(parser)) {
11123 type = PM_TOKEN_USTAR;
11124 } else if (ambiguous_operator_p(parser, space_seen)) {
11125 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11126 }
11127
11128 if (lex_state_operator_p(parser)) {
11129 lex_state_set(parser, PM_LEX_STATE_ARG);
11130 } else {
11131 lex_state_set(parser, PM_LEX_STATE_BEG);
11132 }
11133
11134 LEX(type);
11135 }
11136
11137 // ! != !~ !@
11138 case '!':
11139 if (lex_state_operator_p(parser)) {
11140 lex_state_set(parser, PM_LEX_STATE_ARG);
11141 if (match(parser, '@')) {
11142 LEX(PM_TOKEN_BANG);
11143 }
11144 } else {
11145 lex_state_set(parser, PM_LEX_STATE_BEG);
11146 }
11147
11148 if (match(parser, '=')) {
11149 LEX(PM_TOKEN_BANG_EQUAL);
11150 }
11151
11152 if (match(parser, '~')) {
11153 LEX(PM_TOKEN_BANG_TILDE);
11154 }
11155
11156 LEX(PM_TOKEN_BANG);
11157
11158 // = => =~ == === =begin
11159 case '=':
11160 if (
11161 current_token_starts_line(parser) &&
11162 (parser->current.end + 5 <= parser->end) &&
11163 memcmp(parser->current.end, "begin", 5) == 0 &&
11164 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11165 ) {
11166 pm_token_type_t type = lex_embdoc(parser);
11167 if (type == PM_TOKEN_EOF) {
11168 LEX(type);
11169 }
11170
11171 goto lex_next_token;
11172 }
11173
11174 if (lex_state_operator_p(parser)) {
11175 lex_state_set(parser, PM_LEX_STATE_ARG);
11176 } else {
11177 lex_state_set(parser, PM_LEX_STATE_BEG);
11178 }
11179
11180 if (match(parser, '>')) {
11181 LEX(PM_TOKEN_EQUAL_GREATER);
11182 }
11183
11184 if (match(parser, '~')) {
11185 LEX(PM_TOKEN_EQUAL_TILDE);
11186 }
11187
11188 if (match(parser, '=')) {
11189 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11190 }
11191
11192 LEX(PM_TOKEN_EQUAL);
11193
11194 // < << <<= <= <=>
11195 case '<':
11196 if (match(parser, '<')) {
11197 if (
11198 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11199 !lex_state_end_p(parser) &&
11200 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11201 ) {
11202 const uint8_t *end = parser->current.end;
11203
11204 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11205 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11206
11207 if (match(parser, '-')) {
11208 indent = PM_HEREDOC_INDENT_DASH;
11209 }
11210 else if (match(parser, '~')) {
11211 indent = PM_HEREDOC_INDENT_TILDE;
11212 }
11213
11214 if (match(parser, '`')) {
11215 quote = PM_HEREDOC_QUOTE_BACKTICK;
11216 }
11217 else if (match(parser, '"')) {
11218 quote = PM_HEREDOC_QUOTE_DOUBLE;
11219 }
11220 else if (match(parser, '\'')) {
11221 quote = PM_HEREDOC_QUOTE_SINGLE;
11222 }
11223
11224 const uint8_t *ident_start = parser->current.end;
11225 size_t width = 0;
11226
11227 if (parser->current.end >= parser->end) {
11228 parser->current.end = end;
11229 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11230 parser->current.end = end;
11231 } else {
11232 if (quote == PM_HEREDOC_QUOTE_NONE) {
11233 parser->current.end += width;
11234
11235 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11236 parser->current.end += width;
11237 }
11238 } else {
11239 // If we have quotes, then we're going to go until we find the
11240 // end quote.
11241 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11242 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11243 parser->current.end++;
11244 }
11245 }
11246
11247 size_t ident_length = (size_t) (parser->current.end - ident_start);
11248 bool ident_error = false;
11249
11250 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11251 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11252 ident_error = true;
11253 }
11254
11255 parser->explicit_encoding = NULL;
11256 lex_mode_push(parser, (pm_lex_mode_t) {
11257 .mode = PM_LEX_HEREDOC,
11258 .as.heredoc = {
11259 .base = {
11260 .ident_start = ident_start,
11261 .ident_length = ident_length,
11262 .quote = quote,
11263 .indent = indent
11264 },
11265 .next_start = parser->current.end,
11266 .common_whitespace = NULL,
11267 .line_continuation = false
11268 }
11269 });
11270
11271 if (parser->heredoc_end == NULL) {
11272 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11273
11274 if (body_start == NULL) {
11275 // If there is no newline after the heredoc identifier, then
11276 // this is not a valid heredoc declaration. In this case we
11277 // will add an error, but we will still return a heredoc
11278 // start.
11279 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11280 body_start = parser->end;
11281 } else {
11282 // Otherwise, we want to indicate that the body of the
11283 // heredoc starts on the character after the next newline.
11284 pm_newline_list_append(&parser->newline_list, body_start);
11285 body_start++;
11286 }
11287
11288 parser->next_start = body_start;
11289 } else {
11290 parser->next_start = parser->heredoc_end;
11291 }
11292
11293 LEX(PM_TOKEN_HEREDOC_START);
11294 }
11295 }
11296
11297 if (match(parser, '=')) {
11298 lex_state_set(parser, PM_LEX_STATE_BEG);
11299 LEX(PM_TOKEN_LESS_LESS_EQUAL);
11300 }
11301
11302 if (ambiguous_operator_p(parser, space_seen)) {
11303 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11304 }
11305
11306 if (lex_state_operator_p(parser)) {
11307 lex_state_set(parser, PM_LEX_STATE_ARG);
11308 } else {
11309 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11310 lex_state_set(parser, PM_LEX_STATE_BEG);
11311 }
11312
11313 LEX(PM_TOKEN_LESS_LESS);
11314 }
11315
11316 if (lex_state_operator_p(parser)) {
11317 lex_state_set(parser, PM_LEX_STATE_ARG);
11318 } else {
11319 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11320 lex_state_set(parser, PM_LEX_STATE_BEG);
11321 }
11322
11323 if (match(parser, '=')) {
11324 if (match(parser, '>')) {
11325 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
11326 }
11327
11328 LEX(PM_TOKEN_LESS_EQUAL);
11329 }
11330
11331 LEX(PM_TOKEN_LESS);
11332
11333 // > >> >>= >=
11334 case '>':
11335 if (match(parser, '>')) {
11336 if (lex_state_operator_p(parser)) {
11337 lex_state_set(parser, PM_LEX_STATE_ARG);
11338 } else {
11339 lex_state_set(parser, PM_LEX_STATE_BEG);
11340 }
11341 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11342 }
11343
11344 if (lex_state_operator_p(parser)) {
11345 lex_state_set(parser, PM_LEX_STATE_ARG);
11346 } else {
11347 lex_state_set(parser, PM_LEX_STATE_BEG);
11348 }
11349
11350 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11351
11352 // double-quoted string literal
11353 case '"': {
11354 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11355 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11356 LEX(PM_TOKEN_STRING_BEGIN);
11357 }
11358
11359 // xstring literal
11360 case '`': {
11361 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11362 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11363 LEX(PM_TOKEN_BACKTICK);
11364 }
11365
11366 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11367 if (previous_command_start) {
11368 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11369 } else {
11370 lex_state_set(parser, PM_LEX_STATE_ARG);
11371 }
11372
11373 LEX(PM_TOKEN_BACKTICK);
11374 }
11375
11376 lex_mode_push_string(parser, true, false, '\0', '`');
11377 LEX(PM_TOKEN_BACKTICK);
11378 }
11379
11380 // single-quoted string literal
11381 case '\'': {
11382 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11383 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11384 LEX(PM_TOKEN_STRING_BEGIN);
11385 }
11386
11387 // ? character literal
11388 case '?':
11389 LEX(lex_question_mark(parser));
11390
11391 // & && &&= &=
11392 case '&': {
11393 if (match(parser, '&')) {
11394 lex_state_set(parser, PM_LEX_STATE_BEG);
11395
11396 if (match(parser, '=')) {
11397 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
11398 }
11399
11400 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
11401 }
11402
11403 if (match(parser, '=')) {
11404 lex_state_set(parser, PM_LEX_STATE_BEG);
11405 LEX(PM_TOKEN_AMPERSAND_EQUAL);
11406 }
11407
11408 if (match(parser, '.')) {
11409 lex_state_set(parser, PM_LEX_STATE_DOT);
11410 LEX(PM_TOKEN_AMPERSAND_DOT);
11411 }
11412
11413 pm_token_type_t type = PM_TOKEN_AMPERSAND;
11414 if (lex_state_spcarg_p(parser, space_seen)) {
11415 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11416 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11417 } else {
11418 const uint8_t delim = peek_offset(parser, 1);
11419
11420 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11421 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11422 }
11423 }
11424
11425 type = PM_TOKEN_UAMPERSAND;
11426 } else if (lex_state_beg_p(parser)) {
11427 type = PM_TOKEN_UAMPERSAND;
11428 } else if (ambiguous_operator_p(parser, space_seen)) {
11429 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11430 }
11431
11432 if (lex_state_operator_p(parser)) {
11433 lex_state_set(parser, PM_LEX_STATE_ARG);
11434 } else {
11435 lex_state_set(parser, PM_LEX_STATE_BEG);
11436 }
11437
11438 LEX(type);
11439 }
11440
11441 // | || ||= |=
11442 case '|':
11443 if (match(parser, '|')) {
11444 if (match(parser, '=')) {
11445 lex_state_set(parser, PM_LEX_STATE_BEG);
11446 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
11447 }
11448
11449 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11450 parser->current.end--;
11451 LEX(PM_TOKEN_PIPE);
11452 }
11453
11454 lex_state_set(parser, PM_LEX_STATE_BEG);
11455 LEX(PM_TOKEN_PIPE_PIPE);
11456 }
11457
11458 if (match(parser, '=')) {
11459 lex_state_set(parser, PM_LEX_STATE_BEG);
11460 LEX(PM_TOKEN_PIPE_EQUAL);
11461 }
11462
11463 if (lex_state_operator_p(parser)) {
11464 lex_state_set(parser, PM_LEX_STATE_ARG);
11465 } else {
11466 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11467 }
11468
11469 LEX(PM_TOKEN_PIPE);
11470
11471 // + += +@
11472 case '+': {
11473 if (lex_state_operator_p(parser)) {
11474 lex_state_set(parser, PM_LEX_STATE_ARG);
11475
11476 if (match(parser, '@')) {
11477 LEX(PM_TOKEN_UPLUS);
11478 }
11479
11480 LEX(PM_TOKEN_PLUS);
11481 }
11482
11483 if (match(parser, '=')) {
11484 lex_state_set(parser, PM_LEX_STATE_BEG);
11485 LEX(PM_TOKEN_PLUS_EQUAL);
11486 }
11487
11488 if (
11489 lex_state_beg_p(parser) ||
11490 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11491 ) {
11492 lex_state_set(parser, PM_LEX_STATE_BEG);
11493
11494 if (pm_char_is_decimal_digit(peek(parser))) {
11495 parser->current.end++;
11496 pm_token_type_t type = lex_numeric(parser);
11497 lex_state_set(parser, PM_LEX_STATE_END);
11498 LEX(type);
11499 }
11500
11501 LEX(PM_TOKEN_UPLUS);
11502 }
11503
11504 if (ambiguous_operator_p(parser, space_seen)) {
11505 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11506 }
11507
11508 lex_state_set(parser, PM_LEX_STATE_BEG);
11509 LEX(PM_TOKEN_PLUS);
11510 }
11511
11512 // - -= -@
11513 case '-': {
11514 if (lex_state_operator_p(parser)) {
11515 lex_state_set(parser, PM_LEX_STATE_ARG);
11516
11517 if (match(parser, '@')) {
11518 LEX(PM_TOKEN_UMINUS);
11519 }
11520
11521 LEX(PM_TOKEN_MINUS);
11522 }
11523
11524 if (match(parser, '=')) {
11525 lex_state_set(parser, PM_LEX_STATE_BEG);
11526 LEX(PM_TOKEN_MINUS_EQUAL);
11527 }
11528
11529 if (match(parser, '>')) {
11530 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11531 LEX(PM_TOKEN_MINUS_GREATER);
11532 }
11533
11534 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11535 bool is_beg = lex_state_beg_p(parser);
11536 if (!is_beg && spcarg) {
11537 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11538 }
11539
11540 if (is_beg || spcarg) {
11541 lex_state_set(parser, PM_LEX_STATE_BEG);
11542 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11543 }
11544
11545 if (ambiguous_operator_p(parser, space_seen)) {
11546 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11547 }
11548
11549 lex_state_set(parser, PM_LEX_STATE_BEG);
11550 LEX(PM_TOKEN_MINUS);
11551 }
11552
11553 // . .. ...
11554 case '.': {
11555 bool beg_p = lex_state_beg_p(parser);
11556
11557 if (match(parser, '.')) {
11558 if (match(parser, '.')) {
11559 // If we're _not_ inside a range within default parameters
11560 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11561 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11562 lex_state_set(parser, PM_LEX_STATE_BEG);
11563 } else {
11564 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11565 }
11566 LEX(PM_TOKEN_UDOT_DOT_DOT);
11567 }
11568
11569 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11570 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11571 }
11572
11573 lex_state_set(parser, PM_LEX_STATE_BEG);
11574 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
11575 }
11576
11577 lex_state_set(parser, PM_LEX_STATE_BEG);
11578 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11579 }
11580
11581 lex_state_set(parser, PM_LEX_STATE_DOT);
11582 LEX(PM_TOKEN_DOT);
11583 }
11584
11585 // integer
11586 case '0':
11587 case '1':
11588 case '2':
11589 case '3':
11590 case '4':
11591 case '5':
11592 case '6':
11593 case '7':
11594 case '8':
11595 case '9': {
11596 pm_token_type_t type = lex_numeric(parser);
11597 lex_state_set(parser, PM_LEX_STATE_END);
11598 LEX(type);
11599 }
11600
11601 // :: symbol
11602 case ':':
11603 if (match(parser, ':')) {
11604 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11605 lex_state_set(parser, PM_LEX_STATE_BEG);
11606 LEX(PM_TOKEN_UCOLON_COLON);
11607 }
11608
11609 lex_state_set(parser, PM_LEX_STATE_DOT);
11610 LEX(PM_TOKEN_COLON_COLON);
11611 }
11612
11613 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11614 lex_state_set(parser, PM_LEX_STATE_BEG);
11615 LEX(PM_TOKEN_COLON);
11616 }
11617
11618 if (peek(parser) == '"' || peek(parser) == '\'') {
11619 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11620 parser->current.end++;
11621 }
11622
11623 lex_state_set(parser, PM_LEX_STATE_FNAME);
11624 LEX(PM_TOKEN_SYMBOL_BEGIN);
11625
11626 // / /=
11627 case '/':
11628 if (lex_state_beg_p(parser)) {
11629 lex_mode_push_regexp(parser, '\0', '/');
11630 LEX(PM_TOKEN_REGEXP_BEGIN);
11631 }
11632
11633 if (match(parser, '=')) {
11634 lex_state_set(parser, PM_LEX_STATE_BEG);
11635 LEX(PM_TOKEN_SLASH_EQUAL);
11636 }
11637
11638 if (lex_state_spcarg_p(parser, space_seen)) {
11639 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11640 lex_mode_push_regexp(parser, '\0', '/');
11641 LEX(PM_TOKEN_REGEXP_BEGIN);
11642 }
11643
11644 if (ambiguous_operator_p(parser, space_seen)) {
11645 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11646 }
11647
11648 if (lex_state_operator_p(parser)) {
11649 lex_state_set(parser, PM_LEX_STATE_ARG);
11650 } else {
11651 lex_state_set(parser, PM_LEX_STATE_BEG);
11652 }
11653
11654 LEX(PM_TOKEN_SLASH);
11655
11656 // ^ ^=
11657 case '^':
11658 if (lex_state_operator_p(parser)) {
11659 lex_state_set(parser, PM_LEX_STATE_ARG);
11660 } else {
11661 lex_state_set(parser, PM_LEX_STATE_BEG);
11662 }
11663 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11664
11665 // ~ ~@
11666 case '~':
11667 if (lex_state_operator_p(parser)) {
11668 (void) match(parser, '@');
11669 lex_state_set(parser, PM_LEX_STATE_ARG);
11670 } else {
11671 lex_state_set(parser, PM_LEX_STATE_BEG);
11672 }
11673
11674 LEX(PM_TOKEN_TILDE);
11675
11676 // % %= %i %I %q %Q %w %W
11677 case '%': {
11678 // If there is no subsequent character then we have an
11679 // invalid token. We're going to say it's the percent
11680 // operator because we don't want to move into the string
11681 // lex mode unnecessarily.
11682 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11683 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11684 LEX(PM_TOKEN_PERCENT);
11685 }
11686
11687 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11688 lex_state_set(parser, PM_LEX_STATE_BEG);
11689 LEX(PM_TOKEN_PERCENT_EQUAL);
11690 } else if (
11691 lex_state_beg_p(parser) ||
11692 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11693 lex_state_spcarg_p(parser, space_seen)
11694 ) {
11695 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11696 if (*parser->current.end >= 0x80) {
11697 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11698 }
11699
11700 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11701 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11702 LEX(PM_TOKEN_STRING_BEGIN);
11703 }
11704
11705 // Delimiters for %-literals cannot be alphanumeric. We
11706 // validate that here.
11707 uint8_t delimiter = peek_offset(parser, 1);
11708 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11709 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11710 goto lex_next_token;
11711 }
11712
11713 switch (peek(parser)) {
11714 case 'i': {
11715 parser->current.end++;
11716
11717 if (parser->current.end < parser->end) {
11718 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11719 } else {
11720 lex_mode_push_list_eof(parser);
11721 }
11722
11723 LEX(PM_TOKEN_PERCENT_LOWER_I);
11724 }
11725 case 'I': {
11726 parser->current.end++;
11727
11728 if (parser->current.end < parser->end) {
11729 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11730 } else {
11731 lex_mode_push_list_eof(parser);
11732 }
11733
11734 LEX(PM_TOKEN_PERCENT_UPPER_I);
11735 }
11736 case 'r': {
11737 parser->current.end++;
11738
11739 if (parser->current.end < parser->end) {
11740 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11741 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11742 } else {
11743 lex_mode_push_regexp(parser, '\0', '\0');
11744 }
11745
11746 LEX(PM_TOKEN_REGEXP_BEGIN);
11747 }
11748 case 'q': {
11749 parser->current.end++;
11750
11751 if (parser->current.end < parser->end) {
11752 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11753 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11754 } else {
11755 lex_mode_push_string_eof(parser);
11756 }
11757
11758 LEX(PM_TOKEN_STRING_BEGIN);
11759 }
11760 case 'Q': {
11761 parser->current.end++;
11762
11763 if (parser->current.end < parser->end) {
11764 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11765 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11766 } else {
11767 lex_mode_push_string_eof(parser);
11768 }
11769
11770 LEX(PM_TOKEN_STRING_BEGIN);
11771 }
11772 case 's': {
11773 parser->current.end++;
11774
11775 if (parser->current.end < parser->end) {
11776 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11777 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11778 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11779 } else {
11780 lex_mode_push_string_eof(parser);
11781 }
11782
11783 LEX(PM_TOKEN_SYMBOL_BEGIN);
11784 }
11785 case 'w': {
11786 parser->current.end++;
11787
11788 if (parser->current.end < parser->end) {
11789 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11790 } else {
11791 lex_mode_push_list_eof(parser);
11792 }
11793
11794 LEX(PM_TOKEN_PERCENT_LOWER_W);
11795 }
11796 case 'W': {
11797 parser->current.end++;
11798
11799 if (parser->current.end < parser->end) {
11800 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11801 } else {
11802 lex_mode_push_list_eof(parser);
11803 }
11804
11805 LEX(PM_TOKEN_PERCENT_UPPER_W);
11806 }
11807 case 'x': {
11808 parser->current.end++;
11809
11810 if (parser->current.end < parser->end) {
11811 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11812 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11813 } else {
11814 lex_mode_push_string_eof(parser);
11815 }
11816
11817 LEX(PM_TOKEN_PERCENT_LOWER_X);
11818 }
11819 default:
11820 // If we get to this point, then we have a % that is completely
11821 // unparsable. In this case we'll just drop it from the parser
11822 // and skip past it and hope that the next token is something
11823 // that we can parse.
11824 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11825 goto lex_next_token;
11826 }
11827 }
11828
11829 if (ambiguous_operator_p(parser, space_seen)) {
11830 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11831 }
11832
11833 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11834 LEX(PM_TOKEN_PERCENT);
11835 }
11836
11837 // global variable
11838 case '$': {
11839 pm_token_type_t type = lex_global_variable(parser);
11840
11841 // If we're lexing an embedded variable, then we need to pop back into
11842 // the parent lex context.
11843 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11844 lex_mode_pop(parser);
11845 }
11846
11847 lex_state_set(parser, PM_LEX_STATE_END);
11848 LEX(type);
11849 }
11850
11851 // instance variable, class variable
11852 case '@':
11853 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11854 LEX(lex_at_variable(parser));
11855
11856 default: {
11857 if (*parser->current.start != '_') {
11858 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11859
11860 // If this isn't the beginning of an identifier, then
11861 // it's an invalid token as we've exhausted all of the
11862 // other options. We'll skip past it and return the next
11863 // token after adding an appropriate error message.
11864 if (!width) {
11865 if (*parser->current.start >= 0x80) {
11866 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11867 } else if (*parser->current.start == '\\') {
11868 switch (peek_at(parser, parser->current.start + 1)) {
11869 case ' ':
11870 parser->current.end++;
11871 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11872 break;
11873 case '\f':
11874 parser->current.end++;
11875 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11876 break;
11877 case '\t':
11878 parser->current.end++;
11879 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11880 break;
11881 case '\v':
11882 parser->current.end++;
11883 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11884 break;
11885 case '\r':
11886 if (peek_at(parser, parser->current.start + 2) != '\n') {
11887 parser->current.end++;
11888 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11889 break;
11890 }
11892 default:
11893 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11894 break;
11895 }
11896 } else if (char_is_ascii_printable(*parser->current.start)) {
11897 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11898 } else {
11899 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11900 }
11901
11902 goto lex_next_token;
11903 }
11904
11905 parser->current.end = parser->current.start + width;
11906 }
11907
11908 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11909
11910 // If we've hit a __END__ and it was at the start of the
11911 // line or the start of the file and it is followed by
11912 // either a \n or a \r\n, then this is the last token of the
11913 // file.
11914 if (
11915 ((parser->current.end - parser->current.start) == 7) &&
11916 current_token_starts_line(parser) &&
11917 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11918 (parser->current.end == parser->end || match_eol(parser))
11919 ) {
11920 // Since we know we're about to add an __END__ comment,
11921 // we know we need to add all of the newlines to get the
11922 // correct column information for it.
11923 const uint8_t *cursor = parser->current.end;
11924 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11925 pm_newline_list_append(&parser->newline_list, cursor++);
11926 }
11927
11928 parser->current.end = parser->end;
11929 parser->current.type = PM_TOKEN___END__;
11930 parser_lex_callback(parser);
11931
11932 parser->data_loc.start = parser->current.start;
11933 parser->data_loc.end = parser->current.end;
11934
11935 LEX(PM_TOKEN_EOF);
11936 }
11937
11938 pm_lex_state_t last_state = parser->lex_state;
11939
11940 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11941 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11942 if (previous_command_start) {
11943 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11944 } else {
11945 lex_state_set(parser, PM_LEX_STATE_ARG);
11946 }
11947 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11948 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11949 } else {
11950 lex_state_set(parser, PM_LEX_STATE_END);
11951 }
11952 }
11953
11954 if (
11955 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11956 (type == PM_TOKEN_IDENTIFIER) &&
11957 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11958 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11959 ) {
11960 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11961 }
11962
11963 LEX(type);
11964 }
11965 }
11966 }
11967 case PM_LEX_LIST: {
11968 if (parser->next_start != NULL) {
11969 parser->current.end = parser->next_start;
11970 parser->next_start = NULL;
11971 }
11972
11973 // First we'll set the beginning of the token.
11974 parser->current.start = parser->current.end;
11975
11976 // If there's any whitespace at the start of the list, then we're
11977 // going to trim it off the beginning and create a new token.
11978 size_t whitespace;
11979
11980 if (parser->heredoc_end) {
11981 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11982 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11983 whitespace += 1;
11984 }
11985 } else {
11986 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11987 }
11988
11989 if (whitespace > 0) {
11990 parser->current.end += whitespace;
11991 if (peek_offset(parser, -1) == '\n') {
11992 // mutates next_start
11993 parser_flush_heredoc_end(parser);
11994 }
11995 LEX(PM_TOKEN_WORDS_SEP);
11996 }
11997
11998 // We'll check if we're at the end of the file. If we are, then we
11999 // need to return the EOF token.
12000 if (parser->current.end >= parser->end) {
12001 LEX(PM_TOKEN_EOF);
12002 }
12003
12004 // Here we'll get a list of the places where strpbrk should break,
12005 // and then find the first one.
12006 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12007 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
12008 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12009
12010 // If we haven't found an escape yet, then this buffer will be
12011 // unallocated since we can refer directly to the source string.
12012 pm_token_buffer_t token_buffer = { 0 };
12013
12014 while (breakpoint != NULL) {
12015 // If we hit whitespace, then we must have received content by
12016 // now, so we can return an element of the list.
12017 if (pm_char_is_whitespace(*breakpoint)) {
12018 parser->current.end = breakpoint;
12019 pm_token_buffer_flush(parser, &token_buffer);
12020 LEX(PM_TOKEN_STRING_CONTENT);
12021 }
12022
12023 // If we hit the terminator, we need to check which token to
12024 // return.
12025 if (*breakpoint == lex_mode->as.list.terminator) {
12026 // If this terminator doesn't actually close the list, then
12027 // we need to continue on past it.
12028 if (lex_mode->as.list.nesting > 0) {
12029 parser->current.end = breakpoint + 1;
12030 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12031 lex_mode->as.list.nesting--;
12032 continue;
12033 }
12034
12035 // If we've hit the terminator and we've already skipped
12036 // past content, then we can return a list node.
12037 if (breakpoint > parser->current.start) {
12038 parser->current.end = breakpoint;
12039 pm_token_buffer_flush(parser, &token_buffer);
12040 LEX(PM_TOKEN_STRING_CONTENT);
12041 }
12042
12043 // Otherwise, switch back to the default state and return
12044 // the end of the list.
12045 parser->current.end = breakpoint + 1;
12046 lex_mode_pop(parser);
12047 lex_state_set(parser, PM_LEX_STATE_END);
12048 LEX(PM_TOKEN_STRING_END);
12049 }
12050
12051 // If we hit a null byte, skip directly past it.
12052 if (*breakpoint == '\0') {
12053 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
12054 continue;
12055 }
12056
12057 // If we hit escapes, then we need to treat the next token
12058 // literally. In this case we'll skip past the next character
12059 // and find the next breakpoint.
12060 if (*breakpoint == '\\') {
12061 parser->current.end = breakpoint + 1;
12062
12063 // If we've hit the end of the file, then break out of the
12064 // loop by setting the breakpoint to NULL.
12065 if (parser->current.end == parser->end) {
12066 breakpoint = NULL;
12067 continue;
12068 }
12069
12070 pm_token_buffer_escape(parser, &token_buffer);
12071 uint8_t peeked = peek(parser);
12072
12073 switch (peeked) {
12074 case ' ':
12075 case '\f':
12076 case '\t':
12077 case '\v':
12078 case '\\':
12079 pm_token_buffer_push_byte(&token_buffer, peeked);
12080 parser->current.end++;
12081 break;
12082 case '\r':
12083 parser->current.end++;
12084 if (peek(parser) != '\n') {
12085 pm_token_buffer_push_byte(&token_buffer, '\r');
12086 break;
12087 }
12089 case '\n':
12090 pm_token_buffer_push_byte(&token_buffer, '\n');
12091
12092 if (parser->heredoc_end) {
12093 // ... if we are on the same line as a heredoc,
12094 // flush the heredoc and continue parsing after
12095 // heredoc_end.
12096 parser_flush_heredoc_end(parser);
12097 pm_token_buffer_copy(parser, &token_buffer);
12098 LEX(PM_TOKEN_STRING_CONTENT);
12099 } else {
12100 // ... else track the newline.
12101 pm_newline_list_append(&parser->newline_list, parser->current.end);
12102 }
12103
12104 parser->current.end++;
12105 break;
12106 default:
12107 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12108 pm_token_buffer_push_byte(&token_buffer, peeked);
12109 parser->current.end++;
12110 } else if (lex_mode->as.list.interpolation) {
12111 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12112 } else {
12113 pm_token_buffer_push_byte(&token_buffer, '\\');
12114 pm_token_buffer_push_escaped(&token_buffer, parser);
12115 }
12116
12117 break;
12118 }
12119
12120 token_buffer.cursor = parser->current.end;
12121 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12122 continue;
12123 }
12124
12125 // If we hit a #, then we will attempt to lex interpolation.
12126 if (*breakpoint == '#') {
12127 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12128
12129 if (type == PM_TOKEN_NOT_PROVIDED) {
12130 // If we haven't returned at this point then we had something
12131 // that looked like an interpolated class or instance variable
12132 // like "#@" but wasn't actually. In this case we'll just skip
12133 // to the next breakpoint.
12134 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12135 continue;
12136 }
12137
12138 if (type == PM_TOKEN_STRING_CONTENT) {
12139 pm_token_buffer_flush(parser, &token_buffer);
12140 }
12141
12142 LEX(type);
12143 }
12144
12145 // If we've hit the incrementor, then we need to skip past it
12146 // and find the next breakpoint.
12147 assert(*breakpoint == lex_mode->as.list.incrementor);
12148 parser->current.end = breakpoint + 1;
12149 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12150 lex_mode->as.list.nesting++;
12151 continue;
12152 }
12153
12154 if (parser->current.end > parser->current.start) {
12155 pm_token_buffer_flush(parser, &token_buffer);
12156 LEX(PM_TOKEN_STRING_CONTENT);
12157 }
12158
12159 // If we were unable to find a breakpoint, then this token hits the
12160 // end of the file.
12161 parser->current.end = parser->end;
12162 pm_token_buffer_flush(parser, &token_buffer);
12163 LEX(PM_TOKEN_STRING_CONTENT);
12164 }
12165 case PM_LEX_REGEXP: {
12166 // First, we'll set to start of this token to be the current end.
12167 if (parser->next_start == NULL) {
12168 parser->current.start = parser->current.end;
12169 } else {
12170 parser->current.start = parser->next_start;
12171 parser->current.end = parser->next_start;
12172 parser->next_start = NULL;
12173 }
12174
12175 // We'll check if we're at the end of the file. If we are, then we
12176 // need to return the EOF token.
12177 if (parser->current.end >= parser->end) {
12178 LEX(PM_TOKEN_EOF);
12179 }
12180
12181 // Get a reference to the current mode.
12182 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12183
12184 // These are the places where we need to split up the content of the
12185 // regular expression. We'll use strpbrk to find the first of these
12186 // characters.
12187 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12188 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12189 pm_regexp_token_buffer_t token_buffer = { 0 };
12190
12191 while (breakpoint != NULL) {
12192 uint8_t term = lex_mode->as.regexp.terminator;
12193 bool is_terminator = (*breakpoint == term);
12194
12195 // If the terminator is newline, we need to consider \r\n _also_ a newline
12196 // For example: `%\nfoo\r\n`
12197 // The string should be "foo", not "foo\r"
12198 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12199 if (term == '\n') {
12200 is_terminator = true;
12201 }
12202
12203 // If the terminator is a CR, but we see a CRLF, we need to
12204 // treat the CRLF as a newline, meaning this is _not_ the
12205 // terminator
12206 if (term == '\r') {
12207 is_terminator = false;
12208 }
12209 }
12210
12211 // If we hit the terminator, we need to determine what kind of
12212 // token to return.
12213 if (is_terminator) {
12214 if (lex_mode->as.regexp.nesting > 0) {
12215 parser->current.end = breakpoint + 1;
12216 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12217 lex_mode->as.regexp.nesting--;
12218 continue;
12219 }
12220
12221 // Here we've hit the terminator. If we have already consumed
12222 // content then we need to return that content as string content
12223 // first.
12224 if (breakpoint > parser->current.start) {
12225 parser->current.end = breakpoint;
12226 pm_regexp_token_buffer_flush(parser, &token_buffer);
12227 LEX(PM_TOKEN_STRING_CONTENT);
12228 }
12229
12230 // Check here if we need to track the newline.
12231 size_t eol_length = match_eol_at(parser, breakpoint);
12232 if (eol_length) {
12233 parser->current.end = breakpoint + eol_length;
12234 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12235 } else {
12236 parser->current.end = breakpoint + 1;
12237 }
12238
12239 // Since we've hit the terminator of the regular expression,
12240 // we now need to parse the options.
12241 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12242
12243 lex_mode_pop(parser);
12244 lex_state_set(parser, PM_LEX_STATE_END);
12245 LEX(PM_TOKEN_REGEXP_END);
12246 }
12247
12248 // If we've hit the incrementor, then we need to skip past it
12249 // and find the next breakpoint.
12250 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12251 parser->current.end = breakpoint + 1;
12252 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12253 lex_mode->as.regexp.nesting++;
12254 continue;
12255 }
12256
12257 switch (*breakpoint) {
12258 case '\0':
12259 // If we hit a null byte, skip directly past it.
12260 parser->current.end = breakpoint + 1;
12261 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12262 break;
12263 case '\r':
12264 if (peek_at(parser, breakpoint + 1) != '\n') {
12265 parser->current.end = breakpoint + 1;
12266 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12267 break;
12268 }
12269
12270 breakpoint++;
12271 parser->current.end = breakpoint;
12272 pm_regexp_token_buffer_escape(parser, &token_buffer);
12273 token_buffer.base.cursor = breakpoint;
12274
12276 case '\n':
12277 // If we've hit a newline, then we need to track that in
12278 // the list of newlines.
12279 if (parser->heredoc_end == NULL) {
12280 pm_newline_list_append(&parser->newline_list, breakpoint);
12281 parser->current.end = breakpoint + 1;
12282 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12283 break;
12284 }
12285
12286 parser->current.end = breakpoint + 1;
12287 parser_flush_heredoc_end(parser);
12288 pm_regexp_token_buffer_flush(parser, &token_buffer);
12289 LEX(PM_TOKEN_STRING_CONTENT);
12290 case '\\': {
12291 // If we hit escapes, then we need to treat the next
12292 // token literally. In this case we'll skip past the
12293 // next character and find the next breakpoint.
12294 parser->current.end = breakpoint + 1;
12295
12296 // If we've hit the end of the file, then break out of
12297 // the loop by setting the breakpoint to NULL.
12298 if (parser->current.end == parser->end) {
12299 breakpoint = NULL;
12300 break;
12301 }
12302
12303 pm_regexp_token_buffer_escape(parser, &token_buffer);
12304 uint8_t peeked = peek(parser);
12305
12306 switch (peeked) {
12307 case '\r':
12308 parser->current.end++;
12309 if (peek(parser) != '\n') {
12310 if (lex_mode->as.regexp.terminator != '\r') {
12311 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12312 }
12313 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12314 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12315 break;
12316 }
12318 case '\n':
12319 if (parser->heredoc_end) {
12320 // ... if we are on the same line as a heredoc,
12321 // flush the heredoc and continue parsing after
12322 // heredoc_end.
12323 parser_flush_heredoc_end(parser);
12324 pm_regexp_token_buffer_copy(parser, &token_buffer);
12325 LEX(PM_TOKEN_STRING_CONTENT);
12326 } else {
12327 // ... else track the newline.
12328 pm_newline_list_append(&parser->newline_list, parser->current.end);
12329 }
12330
12331 parser->current.end++;
12332 break;
12333 case 'c':
12334 case 'C':
12335 case 'M':
12336 case 'u':
12337 case 'x':
12338 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12339 break;
12340 default:
12341 if (lex_mode->as.regexp.terminator == peeked) {
12342 // Some characters when they are used as the
12343 // terminator also receive an escape. They are
12344 // enumerated here.
12345 switch (peeked) {
12346 case '$': case ')': case '*': case '+':
12347 case '.': case '>': case '?': case ']':
12348 case '^': case '|': case '}':
12349 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12350 break;
12351 default:
12352 break;
12353 }
12354
12355 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12356 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12357 parser->current.end++;
12358 break;
12359 }
12360
12361 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12362 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12363 break;
12364 }
12365
12366 token_buffer.base.cursor = parser->current.end;
12367 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12368 break;
12369 }
12370 case '#': {
12371 // If we hit a #, then we will attempt to lex
12372 // interpolation.
12373 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12374
12375 if (type == PM_TOKEN_NOT_PROVIDED) {
12376 // If we haven't returned at this point then we had
12377 // something that looked like an interpolated class or
12378 // instance variable like "#@" but wasn't actually. In
12379 // this case we'll just skip to the next breakpoint.
12380 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12381 break;
12382 }
12383
12384 if (type == PM_TOKEN_STRING_CONTENT) {
12385 pm_regexp_token_buffer_flush(parser, &token_buffer);
12386 }
12387
12388 LEX(type);
12389 }
12390 default:
12391 assert(false && "unreachable");
12392 break;
12393 }
12394 }
12395
12396 if (parser->current.end > parser->current.start) {
12397 pm_regexp_token_buffer_flush(parser, &token_buffer);
12398 LEX(PM_TOKEN_STRING_CONTENT);
12399 }
12400
12401 // If we were unable to find a breakpoint, then this token hits the
12402 // end of the file.
12403 parser->current.end = parser->end;
12404 pm_regexp_token_buffer_flush(parser, &token_buffer);
12405 LEX(PM_TOKEN_STRING_CONTENT);
12406 }
12407 case PM_LEX_STRING: {
12408 // First, we'll set to start of this token to be the current end.
12409 if (parser->next_start == NULL) {
12410 parser->current.start = parser->current.end;
12411 } else {
12412 parser->current.start = parser->next_start;
12413 parser->current.end = parser->next_start;
12414 parser->next_start = NULL;
12415 }
12416
12417 // We'll check if we're at the end of the file. If we are, then we need to
12418 // return the EOF token.
12419 if (parser->current.end >= parser->end) {
12420 LEX(PM_TOKEN_EOF);
12421 }
12422
12423 // These are the places where we need to split up the content of the
12424 // string. We'll use strpbrk to find the first of these characters.
12425 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12426 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12427 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12428
12429 // If we haven't found an escape yet, then this buffer will be
12430 // unallocated since we can refer directly to the source string.
12431 pm_token_buffer_t token_buffer = { 0 };
12432
12433 while (breakpoint != NULL) {
12434 // If we hit the incrementor, then we'll increment then nesting and
12435 // continue lexing.
12436 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12437 lex_mode->as.string.nesting++;
12438 parser->current.end = breakpoint + 1;
12439 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12440 continue;
12441 }
12442
12443 uint8_t term = lex_mode->as.string.terminator;
12444 bool is_terminator = (*breakpoint == term);
12445
12446 // If the terminator is newline, we need to consider \r\n _also_ a newline
12447 // For example: `%r\nfoo\r\n`
12448 // The string should be /foo/, not /foo\r/
12449 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12450 if (term == '\n') {
12451 is_terminator = true;
12452 }
12453
12454 // If the terminator is a CR, but we see a CRLF, we need to
12455 // treat the CRLF as a newline, meaning this is _not_ the
12456 // terminator
12457 if (term == '\r') {
12458 is_terminator = false;
12459 }
12460 }
12461
12462 // Note that we have to check the terminator here first because we could
12463 // potentially be parsing a % string that has a # character as the
12464 // terminator.
12465 if (is_terminator) {
12466 // If this terminator doesn't actually close the string, then we need
12467 // to continue on past it.
12468 if (lex_mode->as.string.nesting > 0) {
12469 parser->current.end = breakpoint + 1;
12470 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12471 lex_mode->as.string.nesting--;
12472 continue;
12473 }
12474
12475 // Here we've hit the terminator. If we have already consumed content
12476 // then we need to return that content as string content first.
12477 if (breakpoint > parser->current.start) {
12478 parser->current.end = breakpoint;
12479 pm_token_buffer_flush(parser, &token_buffer);
12480 LEX(PM_TOKEN_STRING_CONTENT);
12481 }
12482
12483 // Otherwise we need to switch back to the parent lex mode and
12484 // return the end of the string.
12485 size_t eol_length = match_eol_at(parser, breakpoint);
12486 if (eol_length) {
12487 parser->current.end = breakpoint + eol_length;
12488 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12489 } else {
12490 parser->current.end = breakpoint + 1;
12491 }
12492
12493 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12494 parser->current.end++;
12495 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12496 lex_mode_pop(parser);
12497 LEX(PM_TOKEN_LABEL_END);
12498 }
12499
12500 lex_state_set(parser, PM_LEX_STATE_END);
12501 lex_mode_pop(parser);
12502 LEX(PM_TOKEN_STRING_END);
12503 }
12504
12505 switch (*breakpoint) {
12506 case '\0':
12507 // Skip directly past the null character.
12508 parser->current.end = breakpoint + 1;
12509 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12510 break;
12511 case '\r':
12512 if (peek_at(parser, breakpoint + 1) != '\n') {
12513 parser->current.end = breakpoint + 1;
12514 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12515 break;
12516 }
12517
12518 // If we hit a \r\n sequence, then we need to treat it
12519 // as a newline.
12520 breakpoint++;
12521 parser->current.end = breakpoint;
12522 pm_token_buffer_escape(parser, &token_buffer);
12523 token_buffer.cursor = breakpoint;
12524
12526 case '\n':
12527 // When we hit a newline, we need to flush any potential
12528 // heredocs. Note that this has to happen after we check
12529 // for the terminator in case the terminator is a
12530 // newline character.
12531 if (parser->heredoc_end == NULL) {
12532 pm_newline_list_append(&parser->newline_list, breakpoint);
12533 parser->current.end = breakpoint + 1;
12534 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12535 break;
12536 }
12537
12538 parser->current.end = breakpoint + 1;
12539 parser_flush_heredoc_end(parser);
12540 pm_token_buffer_flush(parser, &token_buffer);
12541 LEX(PM_TOKEN_STRING_CONTENT);
12542 case '\\': {
12543 // Here we hit escapes.
12544 parser->current.end = breakpoint + 1;
12545
12546 // If we've hit the end of the file, then break out of
12547 // the loop by setting the breakpoint to NULL.
12548 if (parser->current.end == parser->end) {
12549 breakpoint = NULL;
12550 continue;
12551 }
12552
12553 pm_token_buffer_escape(parser, &token_buffer);
12554 uint8_t peeked = peek(parser);
12555
12556 switch (peeked) {
12557 case '\\':
12558 pm_token_buffer_push_byte(&token_buffer, '\\');
12559 parser->current.end++;
12560 break;
12561 case '\r':
12562 parser->current.end++;
12563 if (peek(parser) != '\n') {
12564 if (!lex_mode->as.string.interpolation) {
12565 pm_token_buffer_push_byte(&token_buffer, '\\');
12566 }
12567 pm_token_buffer_push_byte(&token_buffer, '\r');
12568 break;
12569 }
12571 case '\n':
12572 if (!lex_mode->as.string.interpolation) {
12573 pm_token_buffer_push_byte(&token_buffer, '\\');
12574 pm_token_buffer_push_byte(&token_buffer, '\n');
12575 }
12576
12577 if (parser->heredoc_end) {
12578 // ... if we are on the same line as a heredoc,
12579 // flush the heredoc and continue parsing after
12580 // heredoc_end.
12581 parser_flush_heredoc_end(parser);
12582 pm_token_buffer_copy(parser, &token_buffer);
12583 LEX(PM_TOKEN_STRING_CONTENT);
12584 } else {
12585 // ... else track the newline.
12586 pm_newline_list_append(&parser->newline_list, parser->current.end);
12587 }
12588
12589 parser->current.end++;
12590 break;
12591 default:
12592 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12593 pm_token_buffer_push_byte(&token_buffer, peeked);
12594 parser->current.end++;
12595 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12596 pm_token_buffer_push_byte(&token_buffer, peeked);
12597 parser->current.end++;
12598 } else if (lex_mode->as.string.interpolation) {
12599 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12600 } else {
12601 pm_token_buffer_push_byte(&token_buffer, '\\');
12602 pm_token_buffer_push_escaped(&token_buffer, parser);
12603 }
12604
12605 break;
12606 }
12607
12608 token_buffer.cursor = parser->current.end;
12609 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12610 break;
12611 }
12612 case '#': {
12613 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12614
12615 if (type == PM_TOKEN_NOT_PROVIDED) {
12616 // If we haven't returned at this point then we had something that
12617 // looked like an interpolated class or instance variable like "#@"
12618 // but wasn't actually. In this case we'll just skip to the next
12619 // breakpoint.
12620 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12621 break;
12622 }
12623
12624 if (type == PM_TOKEN_STRING_CONTENT) {
12625 pm_token_buffer_flush(parser, &token_buffer);
12626 }
12627
12628 LEX(type);
12629 }
12630 default:
12631 assert(false && "unreachable");
12632 }
12633 }
12634
12635 if (parser->current.end > parser->current.start) {
12636 pm_token_buffer_flush(parser, &token_buffer);
12637 LEX(PM_TOKEN_STRING_CONTENT);
12638 }
12639
12640 // If we've hit the end of the string, then this is an unterminated
12641 // string. In that case we'll return a string content token.
12642 parser->current.end = parser->end;
12643 pm_token_buffer_flush(parser, &token_buffer);
12644 LEX(PM_TOKEN_STRING_CONTENT);
12645 }
12646 case PM_LEX_HEREDOC: {
12647 // First, we'll set to start of this token.
12648 if (parser->next_start == NULL) {
12649 parser->current.start = parser->current.end;
12650 } else {
12651 parser->current.start = parser->next_start;
12652 parser->current.end = parser->next_start;
12653 parser->heredoc_end = NULL;
12654 parser->next_start = NULL;
12655 }
12656
12657 // Now let's grab the information about the identifier off of the
12658 // current lex mode.
12659 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12660 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12661
12662 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12663 lex_mode->as.heredoc.line_continuation = false;
12664
12665 // We'll check if we're at the end of the file. If we are, then we
12666 // will add an error (because we weren't able to find the
12667 // terminator) but still continue parsing so that content after the
12668 // declaration of the heredoc can be parsed.
12669 if (parser->current.end >= parser->end) {
12670 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12671 parser->next_start = lex_mode->as.heredoc.next_start;
12672 parser->heredoc_end = parser->current.end;
12673 lex_state_set(parser, PM_LEX_STATE_END);
12674 lex_mode_pop(parser);
12675 LEX(PM_TOKEN_HEREDOC_END);
12676 }
12677
12678 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12679 size_t ident_length = heredoc_lex_mode->ident_length;
12680
12681 // If we are immediately following a newline and we have hit the
12682 // terminator, then we need to return the ending of the heredoc.
12683 if (current_token_starts_line(parser)) {
12684 const uint8_t *start = parser->current.start;
12685
12686 if (!line_continuation && (start + ident_length <= parser->end)) {
12687 const uint8_t *newline = next_newline(start, parser->end - start);
12688 const uint8_t *ident_end = newline;
12689 const uint8_t *terminator_end = newline;
12690
12691 if (newline == NULL) {
12692 terminator_end = parser->end;
12693 ident_end = parser->end;
12694 } else {
12695 terminator_end++;
12696 if (newline[-1] == '\r') {
12697 ident_end--; // Remove \r
12698 }
12699 }
12700
12701 const uint8_t *terminator_start = ident_end - ident_length;
12702 const uint8_t *cursor = start;
12703
12704 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12705 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12706 cursor++;
12707 }
12708 }
12709
12710 if (
12711 (cursor == terminator_start) &&
12712 (memcmp(terminator_start, ident_start, ident_length) == 0)
12713 ) {
12714 if (newline != NULL) {
12715 pm_newline_list_append(&parser->newline_list, newline);
12716 }
12717
12718 parser->current.end = terminator_end;
12719 if (*lex_mode->as.heredoc.next_start == '\\') {
12720 parser->next_start = NULL;
12721 } else {
12722 parser->next_start = lex_mode->as.heredoc.next_start;
12723 parser->heredoc_end = parser->current.end;
12724 }
12725
12726 lex_state_set(parser, PM_LEX_STATE_END);
12727 lex_mode_pop(parser);
12728 LEX(PM_TOKEN_HEREDOC_END);
12729 }
12730 }
12731
12732 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12733 if (
12734 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12735 lex_mode->as.heredoc.common_whitespace != NULL &&
12736 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12737 peek_at(parser, start) != '\n'
12738 ) {
12739 *lex_mode->as.heredoc.common_whitespace = whitespace;
12740 }
12741 }
12742
12743 // Otherwise we'll be parsing string content. These are the places
12744 // where we need to split up the content of the heredoc. We'll use
12745 // strpbrk to find the first of these characters.
12746 uint8_t breakpoints[] = "\r\n\\#";
12747
12748 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12749 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12750 breakpoints[3] = '\0';
12751 }
12752
12753 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12754 pm_token_buffer_t token_buffer = { 0 };
12755 bool was_line_continuation = false;
12756
12757 while (breakpoint != NULL) {
12758 switch (*breakpoint) {
12759 case '\0':
12760 // Skip directly past the null character.
12761 parser->current.end = breakpoint + 1;
12762 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12763 break;
12764 case '\r':
12765 parser->current.end = breakpoint + 1;
12766
12767 if (peek_at(parser, breakpoint + 1) != '\n') {
12768 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12769 break;
12770 }
12771
12772 // If we hit a \r\n sequence, then we want to replace it
12773 // with a single \n character in the final string.
12774 breakpoint++;
12775 pm_token_buffer_escape(parser, &token_buffer);
12776 token_buffer.cursor = breakpoint;
12777
12779 case '\n': {
12780 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12781 parser_flush_heredoc_end(parser);
12782 parser->current.end = breakpoint + 1;
12783 pm_token_buffer_flush(parser, &token_buffer);
12784 LEX(PM_TOKEN_STRING_CONTENT);
12785 }
12786
12787 pm_newline_list_append(&parser->newline_list, breakpoint);
12788
12789 // If we have a - or ~ heredoc, then we can match after
12790 // some leading whitespace.
12791 const uint8_t *start = breakpoint + 1;
12792
12793 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12794 // We want to match the terminator starting from the end of the line in case
12795 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12796 const uint8_t *newline = next_newline(start, parser->end - start);
12797
12798 if (newline == NULL) {
12799 newline = parser->end;
12800 } else if (newline[-1] == '\r') {
12801 newline--; // Remove \r
12802 }
12803
12804 // Start of a possible terminator.
12805 const uint8_t *terminator_start = newline - ident_length;
12806
12807 // Cursor to check for the leading whitespace. We skip the
12808 // leading whitespace if we have a - or ~ heredoc.
12809 const uint8_t *cursor = start;
12810
12811 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12812 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12813 cursor++;
12814 }
12815 }
12816
12817 if (
12818 cursor == terminator_start &&
12819 (memcmp(terminator_start, ident_start, ident_length) == 0)
12820 ) {
12821 parser->current.end = breakpoint + 1;
12822 pm_token_buffer_flush(parser, &token_buffer);
12823 LEX(PM_TOKEN_STRING_CONTENT);
12824 }
12825 }
12826
12827 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12828
12829 // If we have hit a newline that is followed by a valid
12830 // terminator, then we need to return the content of the
12831 // heredoc here as string content. Then, the next time a
12832 // token is lexed, it will match again and return the
12833 // end of the heredoc.
12834 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12835 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12836 *lex_mode->as.heredoc.common_whitespace = whitespace;
12837 }
12838
12839 parser->current.end = breakpoint + 1;
12840 pm_token_buffer_flush(parser, &token_buffer);
12841 LEX(PM_TOKEN_STRING_CONTENT);
12842 }
12843
12844 // Otherwise we hit a newline and it wasn't followed by
12845 // a terminator, so we can continue parsing.
12846 parser->current.end = breakpoint + 1;
12847 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12848 break;
12849 }
12850 case '\\': {
12851 // If we hit an escape, then we need to skip past
12852 // however many characters the escape takes up. However
12853 // it's important that if \n or \r\n are escaped, we
12854 // stop looping before the newline and not after the
12855 // newline so that we can still potentially find the
12856 // terminator of the heredoc.
12857 parser->current.end = breakpoint + 1;
12858
12859 // If we've hit the end of the file, then break out of
12860 // the loop by setting the breakpoint to NULL.
12861 if (parser->current.end == parser->end) {
12862 breakpoint = NULL;
12863 continue;
12864 }
12865
12866 pm_token_buffer_escape(parser, &token_buffer);
12867 uint8_t peeked = peek(parser);
12868
12869 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12870 switch (peeked) {
12871 case '\r':
12872 parser->current.end++;
12873 if (peek(parser) != '\n') {
12874 pm_token_buffer_push_byte(&token_buffer, '\\');
12875 pm_token_buffer_push_byte(&token_buffer, '\r');
12876 break;
12877 }
12879 case '\n':
12880 pm_token_buffer_push_byte(&token_buffer, '\\');
12881 pm_token_buffer_push_byte(&token_buffer, '\n');
12882 token_buffer.cursor = parser->current.end + 1;
12883 breakpoint = parser->current.end;
12884 continue;
12885 default:
12886 pm_token_buffer_push_byte(&token_buffer, '\\');
12887 pm_token_buffer_push_escaped(&token_buffer, parser);
12888 break;
12889 }
12890 } else {
12891 switch (peeked) {
12892 case '\r':
12893 parser->current.end++;
12894 if (peek(parser) != '\n') {
12895 pm_token_buffer_push_byte(&token_buffer, '\r');
12896 break;
12897 }
12899 case '\n':
12900 // If we are in a tilde here, we should
12901 // break out of the loop and return the
12902 // string content.
12903 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12904 const uint8_t *end = parser->current.end;
12905 pm_newline_list_append(&parser->newline_list, end);
12906
12907 // Here we want the buffer to only
12908 // include up to the backslash.
12909 parser->current.end = breakpoint;
12910 pm_token_buffer_flush(parser, &token_buffer);
12911
12912 // Now we can advance the end of the
12913 // token past the newline.
12914 parser->current.end = end + 1;
12915 lex_mode->as.heredoc.line_continuation = true;
12916 LEX(PM_TOKEN_STRING_CONTENT);
12917 }
12918
12919 was_line_continuation = true;
12920 token_buffer.cursor = parser->current.end + 1;
12921 breakpoint = parser->current.end;
12922 continue;
12923 default:
12924 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12925 break;
12926 }
12927 }
12928
12929 token_buffer.cursor = parser->current.end;
12930 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12931 break;
12932 }
12933 case '#': {
12934 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12935
12936 if (type == PM_TOKEN_NOT_PROVIDED) {
12937 // If we haven't returned at this point then we had
12938 // something that looked like an interpolated class
12939 // or instance variable like "#@" but wasn't
12940 // actually. In this case we'll just skip to the
12941 // next breakpoint.
12942 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12943 break;
12944 }
12945
12946 if (type == PM_TOKEN_STRING_CONTENT) {
12947 pm_token_buffer_flush(parser, &token_buffer);
12948 }
12949
12950 LEX(type);
12951 }
12952 default:
12953 assert(false && "unreachable");
12954 }
12955
12956 was_line_continuation = false;
12957 }
12958
12959 if (parser->current.end > parser->current.start) {
12960 parser->current.end = parser->end;
12961 pm_token_buffer_flush(parser, &token_buffer);
12962 LEX(PM_TOKEN_STRING_CONTENT);
12963 }
12964
12965 // If we've hit the end of the string, then this is an unterminated
12966 // heredoc. In that case we'll return a string content token.
12967 parser->current.end = parser->end;
12968 pm_token_buffer_flush(parser, &token_buffer);
12969 LEX(PM_TOKEN_STRING_CONTENT);
12970 }
12971 }
12972
12973 assert(false && "unreachable");
12974}
12975
12976#undef LEX
12977
12978/******************************************************************************/
12979/* Parse functions */
12980/******************************************************************************/
12981
12990typedef enum {
12991 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12992 PM_BINDING_POWER_STATEMENT = 2,
12993 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12994 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12995 PM_BINDING_POWER_COMPOSITION = 8, // and or
12996 PM_BINDING_POWER_NOT = 10, // not
12997 PM_BINDING_POWER_MATCH = 12, // => in
12998 PM_BINDING_POWER_DEFINED = 14, // defined?
12999 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
13000 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
13001 PM_BINDING_POWER_TERNARY = 20, // ?:
13002 PM_BINDING_POWER_RANGE = 22, // .. ...
13003 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
13004 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
13005 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
13006 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
13007 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
13008 PM_BINDING_POWER_BITWISE_AND = 34, // &
13009 PM_BINDING_POWER_SHIFT = 36, // << >>
13010 PM_BINDING_POWER_TERM = 38, // + -
13011 PM_BINDING_POWER_FACTOR = 40, // * / %
13012 PM_BINDING_POWER_UMINUS = 42, // -@
13013 PM_BINDING_POWER_EXPONENT = 44, // **
13014 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
13015 PM_BINDING_POWER_INDEX = 48, // [] []=
13016 PM_BINDING_POWER_CALL = 50, // :: .
13017 PM_BINDING_POWER_MAX = 52
13018} pm_binding_power_t;
13019
13024typedef struct {
13026 pm_binding_power_t left;
13027
13029 pm_binding_power_t right;
13030
13033
13040
13041#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
13042#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
13043#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
13044#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
13045#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
13046
13047pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
13048 // rescue
13049 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
13050
13051 // if unless until while
13052 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13053 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13054 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13055 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13056
13057 // and or
13058 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13059 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13060
13061 // => in
13062 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13063 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13064
13065 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
13066 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13067 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13068 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13069 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13070 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13071 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13072 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13073 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13074 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13075 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13076 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13077 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13078 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13079 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13080
13081 // ?:
13082 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13083
13084 // .. ...
13085 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13086 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13087 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13088 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13089
13090 // ||
13091 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13092
13093 // &&
13094 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13095
13096 // != !~ == === =~ <=>
13097 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13098 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13099 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13100 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13101 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13102 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13103
13104 // > >= < <=
13105 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13106 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13107 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13108 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13109
13110 // ^ |
13111 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13112 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13113
13114 // &
13115 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13116
13117 // >> <<
13118 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13119 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13120
13121 // - +
13122 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13123 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13124
13125 // % / *
13126 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13127 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13128 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13129 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13130
13131 // -@
13132 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13133 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13134
13135 // **
13136 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13137 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13138
13139 // ! ~ +@
13140 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13141 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13142 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13143
13144 // [
13145 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13146
13147 // :: . &.
13148 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13149 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13150 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13151};
13152
13153#undef BINDING_POWER_ASSIGNMENT
13154#undef LEFT_ASSOCIATIVE
13155#undef RIGHT_ASSOCIATIVE
13156#undef RIGHT_ASSOCIATIVE_UNARY
13157
13161static inline bool
13162match1(const pm_parser_t *parser, pm_token_type_t type) {
13163 return parser->current.type == type;
13164}
13165
13169static inline bool
13170match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13171 return match1(parser, type1) || match1(parser, type2);
13172}
13173
13177static inline bool
13178match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13179 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13180}
13181
13185static inline bool
13186match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13187 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13188}
13189
13193static inline bool
13194match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13195 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13196}
13197
13201static inline bool
13202match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13203 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13204}
13205
13212static bool
13213accept1(pm_parser_t *parser, pm_token_type_t type) {
13214 if (match1(parser, type)) {
13215 parser_lex(parser);
13216 return true;
13217 }
13218 return false;
13219}
13220
13225static inline bool
13226accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13227 if (match2(parser, type1, type2)) {
13228 parser_lex(parser);
13229 return true;
13230 }
13231 return false;
13232}
13233
13245static void
13246expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13247 if (accept1(parser, type)) return;
13248
13249 const uint8_t *location = parser->previous.end;
13250 pm_parser_err(parser, location, location, diag_id);
13251
13252 parser->previous.start = location;
13253 parser->previous.type = PM_TOKEN_MISSING;
13254}
13255
13260static void
13261expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13262 if (accept2(parser, type1, type2)) return;
13263
13264 const uint8_t *location = parser->previous.end;
13265 pm_parser_err(parser, location, location, diag_id);
13266
13267 parser->previous.start = location;
13268 parser->previous.type = PM_TOKEN_MISSING;
13269}
13270
13275static void
13276expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13277 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13278 parser_lex(parser);
13279 } else {
13280 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13281 parser->previous.start = parser->previous.end;
13282 parser->previous.type = PM_TOKEN_MISSING;
13283 }
13284}
13285
13286static pm_node_t *
13287parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13288
13293static pm_node_t *
13294parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13295 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13296 pm_assert_value_expression(parser, node);
13297 return node;
13298}
13299
13318static inline bool
13319token_begins_expression_p(pm_token_type_t type) {
13320 switch (type) {
13321 case PM_TOKEN_EQUAL_GREATER:
13322 case PM_TOKEN_KEYWORD_IN:
13323 // We need to special case this because it is a binary operator that
13324 // should not be marked as beginning an expression.
13325 return false;
13326 case PM_TOKEN_BRACE_RIGHT:
13327 case PM_TOKEN_BRACKET_RIGHT:
13328 case PM_TOKEN_COLON:
13329 case PM_TOKEN_COMMA:
13330 case PM_TOKEN_EMBEXPR_END:
13331 case PM_TOKEN_EOF:
13332 case PM_TOKEN_LAMBDA_BEGIN:
13333 case PM_TOKEN_KEYWORD_DO:
13334 case PM_TOKEN_KEYWORD_DO_LOOP:
13335 case PM_TOKEN_KEYWORD_END:
13336 case PM_TOKEN_KEYWORD_ELSE:
13337 case PM_TOKEN_KEYWORD_ELSIF:
13338 case PM_TOKEN_KEYWORD_ENSURE:
13339 case PM_TOKEN_KEYWORD_THEN:
13340 case PM_TOKEN_KEYWORD_RESCUE:
13341 case PM_TOKEN_KEYWORD_WHEN:
13342 case PM_TOKEN_NEWLINE:
13343 case PM_TOKEN_PARENTHESIS_RIGHT:
13344 case PM_TOKEN_SEMICOLON:
13345 // The reason we need this short-circuit is because we're using the
13346 // binding powers table to tell us if the subsequent token could
13347 // potentially be the start of an expression. If there _is_ a binding
13348 // power for one of these tokens, then we should remove it from this list
13349 // and let it be handled by the default case below.
13350 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13351 return false;
13352 case PM_TOKEN_UAMPERSAND:
13353 // This is a special case because this unary operator cannot appear
13354 // as a general operator, it only appears in certain circumstances.
13355 return false;
13356 case PM_TOKEN_UCOLON_COLON:
13357 case PM_TOKEN_UMINUS:
13358 case PM_TOKEN_UMINUS_NUM:
13359 case PM_TOKEN_UPLUS:
13360 case PM_TOKEN_BANG:
13361 case PM_TOKEN_TILDE:
13362 case PM_TOKEN_UDOT_DOT:
13363 case PM_TOKEN_UDOT_DOT_DOT:
13364 // These unary tokens actually do have binding power associated with them
13365 // so that we can correctly place them into the precedence order. But we
13366 // want them to be marked as beginning an expression, so we need to
13367 // special case them here.
13368 return true;
13369 default:
13370 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13371 }
13372}
13373
13378static pm_node_t *
13379parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13380 if (accept1(parser, PM_TOKEN_USTAR)) {
13381 pm_token_t operator = parser->previous;
13382 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13383 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13384 }
13385
13386 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13387}
13388
13393static void
13394parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13395 // The method name needs to change. If we previously had
13396 // foo, we now need foo=. In this case we'll allocate a new
13397 // owned string, copy the previous method name in, and
13398 // append an =.
13399 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13400 size_t length = constant->length;
13401 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13402 if (name == NULL) return;
13403
13404 memcpy(name, constant->start, length);
13405 name[length] = '=';
13406
13407 // Now switch the name to the new string.
13408 // This silences clang analyzer warning about leak of memory pointed by `name`.
13409 // NOLINTNEXTLINE(clang-analyzer-*)
13410 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13411}
13412
13419static pm_node_t *
13420parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13421 switch (PM_NODE_TYPE(target)) {
13422 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13423 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13424 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13425 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13426 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13427 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13428 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13429 default: break;
13430 }
13431
13432 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13433 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13434
13435 pm_node_destroy(parser, target);
13436 return (pm_node_t *) result;
13437}
13438
13444static void
13445parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13446 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13447
13448 for (size_t index = 0; index < implicit_parameters->size; index++) {
13449 if (implicit_parameters->nodes[index] == node) {
13450 // If the node is not the last one in the list, we need to shift the
13451 // remaining nodes down to fill the gap. This is extremely unlikely
13452 // to happen.
13453 if (index != implicit_parameters->size - 1) {
13454 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13455 }
13456
13457 implicit_parameters->size--;
13458 break;
13459 }
13460 }
13461}
13462
13471static pm_node_t *
13472parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13473 switch (PM_NODE_TYPE(target)) {
13474 case PM_MISSING_NODE:
13475 return target;
13476 case PM_SOURCE_ENCODING_NODE:
13477 case PM_FALSE_NODE:
13478 case PM_SOURCE_FILE_NODE:
13479 case PM_SOURCE_LINE_NODE:
13480 case PM_NIL_NODE:
13481 case PM_SELF_NODE:
13482 case PM_TRUE_NODE: {
13483 // In these special cases, we have specific error messages and we
13484 // will replace them with local variable writes.
13485 return parse_unwriteable_target(parser, target);
13486 }
13487 case PM_CLASS_VARIABLE_READ_NODE:
13489 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
13490 return target;
13491 case PM_CONSTANT_PATH_NODE:
13492 if (context_def_p(parser)) {
13493 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13494 }
13495
13497 target->type = PM_CONSTANT_PATH_TARGET_NODE;
13498
13499 return target;
13500 case PM_CONSTANT_READ_NODE:
13501 if (context_def_p(parser)) {
13502 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13503 }
13504
13505 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13506 target->type = PM_CONSTANT_TARGET_NODE;
13507
13508 return target;
13509 case PM_BACK_REFERENCE_READ_NODE:
13510 case PM_NUMBERED_REFERENCE_READ_NODE:
13511 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13512 return target;
13513 case PM_GLOBAL_VARIABLE_READ_NODE:
13515 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13516 return target;
13517 case PM_LOCAL_VARIABLE_READ_NODE: {
13518 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13519 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13520 parse_target_implicit_parameter(parser, target);
13521 }
13522
13523 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13524 uint32_t name = cast->name;
13525 uint32_t depth = cast->depth;
13526 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13527
13529 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13530
13531 return target;
13532 }
13533 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13534 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13535 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13536
13537 parse_target_implicit_parameter(parser, target);
13538 pm_node_destroy(parser, target);
13539
13540 return node;
13541 }
13542 case PM_INSTANCE_VARIABLE_READ_NODE:
13544 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13545 return target;
13546 case PM_MULTI_TARGET_NODE:
13547 if (splat_parent) {
13548 // Multi target is not accepted in all positions. If this is one
13549 // of them, then we need to add an error.
13550 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13551 }
13552
13553 return target;
13554 case PM_SPLAT_NODE: {
13555 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13556
13557 if (splat->expression != NULL) {
13558 splat->expression = parse_target(parser, splat->expression, multiple, true);
13559 }
13560
13561 return (pm_node_t *) splat;
13562 }
13563 case PM_CALL_NODE: {
13564 pm_call_node_t *call = (pm_call_node_t *) target;
13565
13566 // If we have no arguments to the call node and we need this to be a
13567 // target then this is either a method call or a local variable
13568 // write.
13569 if (
13570 (call->message_loc.start != NULL) &&
13571 (call->message_loc.end[-1] != '!') &&
13572 (call->message_loc.end[-1] != '?') &&
13573 (call->opening_loc.start == NULL) &&
13574 (call->arguments == NULL) &&
13575 (call->block == NULL)
13576 ) {
13577 if (call->receiver == NULL) {
13578 // When we get here, we have a local variable write, because it
13579 // was previously marked as a method call but now we have an =.
13580 // This looks like:
13581 //
13582 // foo = 1
13583 //
13584 // When it was parsed in the prefix position, foo was seen as a
13585 // method call with no receiver and no arguments. Now we have an
13586 // =, so we know it's a local variable write.
13587 const pm_location_t message_loc = call->message_loc;
13588
13589 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13590 pm_node_destroy(parser, target);
13591
13592 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13593 }
13594
13595 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13596 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13597 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13598 }
13599
13600 parse_write_name(parser, &call->name);
13601 return (pm_node_t *) pm_call_target_node_create(parser, call);
13602 }
13603 }
13604
13605 // If there is no call operator and the message is "[]" then this is
13606 // an aref expression, and we can transform it into an aset
13607 // expression.
13608 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13609 return (pm_node_t *) pm_index_target_node_create(parser, call);
13610 }
13611 }
13613 default:
13614 // In this case we have a node that we don't know how to convert
13615 // into a target. We need to treat it as an error. For now, we'll
13616 // mark it as an error and just skip right past it.
13617 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13618 return target;
13619 }
13620}
13621
13626static pm_node_t *
13627parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13628 pm_node_t *result = parse_target(parser, target, multiple, false);
13629
13630 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13631 // parens after the targets.
13632 if (
13633 !match1(parser, PM_TOKEN_EQUAL) &&
13634 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13635 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13636 ) {
13637 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13638 }
13639
13640 return result;
13641}
13642
13647static pm_node_t *
13648parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13649 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13650
13651 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13652 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13653 }
13654
13655 return write;
13656}
13657
13661static pm_node_t *
13662parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13663 switch (PM_NODE_TYPE(target)) {
13664 case PM_MISSING_NODE:
13665 pm_node_destroy(parser, value);
13666 return target;
13667 case PM_CLASS_VARIABLE_READ_NODE: {
13668 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13669 pm_node_destroy(parser, target);
13670 return (pm_node_t *) node;
13671 }
13672 case PM_CONSTANT_PATH_NODE: {
13673 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13674
13675 if (context_def_p(parser)) {
13676 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13677 }
13678
13679 return parse_shareable_constant_write(parser, node);
13680 }
13681 case PM_CONSTANT_READ_NODE: {
13682 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13683
13684 if (context_def_p(parser)) {
13685 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13686 }
13687
13688 pm_node_destroy(parser, target);
13689 return parse_shareable_constant_write(parser, node);
13690 }
13691 case PM_BACK_REFERENCE_READ_NODE:
13692 case PM_NUMBERED_REFERENCE_READ_NODE:
13693 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13695 case PM_GLOBAL_VARIABLE_READ_NODE: {
13696 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13697 pm_node_destroy(parser, target);
13698 return (pm_node_t *) node;
13699 }
13700 case PM_LOCAL_VARIABLE_READ_NODE: {
13702
13703 pm_constant_id_t name = local_read->name;
13704 pm_location_t name_loc = target->location;
13705
13706 uint32_t depth = local_read->depth;
13707 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13708
13709 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13710 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13711 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13712 parse_target_implicit_parameter(parser, target);
13713 }
13714
13715 pm_locals_unread(&scope->locals, name);
13716 pm_node_destroy(parser, target);
13717
13718 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13719 }
13720 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13721 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13722 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13723
13724 parse_target_implicit_parameter(parser, target);
13725 pm_node_destroy(parser, target);
13726
13727 return node;
13728 }
13729 case PM_INSTANCE_VARIABLE_READ_NODE: {
13730 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13731 pm_node_destroy(parser, target);
13732 return write_node;
13733 }
13734 case PM_MULTI_TARGET_NODE:
13735 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13736 case PM_SPLAT_NODE: {
13737 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13738
13739 if (splat->expression != NULL) {
13740 splat->expression = parse_write(parser, splat->expression, operator, value);
13741 }
13742
13743 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13744 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13745
13746 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13747 }
13748 case PM_CALL_NODE: {
13749 pm_call_node_t *call = (pm_call_node_t *) target;
13750
13751 // If we have no arguments to the call node and we need this to be a
13752 // target then this is either a method call or a local variable
13753 // write.
13754 if (
13755 (call->message_loc.start != NULL) &&
13756 (call->message_loc.end[-1] != '!') &&
13757 (call->message_loc.end[-1] != '?') &&
13758 (call->opening_loc.start == NULL) &&
13759 (call->arguments == NULL) &&
13760 (call->block == NULL)
13761 ) {
13762 if (call->receiver == NULL) {
13763 // When we get here, we have a local variable write, because it
13764 // was previously marked as a method call but now we have an =.
13765 // This looks like:
13766 //
13767 // foo = 1
13768 //
13769 // When it was parsed in the prefix position, foo was seen as a
13770 // method call with no receiver and no arguments. Now we have an
13771 // =, so we know it's a local variable write.
13772 const pm_location_t message = call->message_loc;
13773
13774 pm_parser_local_add_location(parser, message.start, message.end, 0);
13775 pm_node_destroy(parser, target);
13776
13777 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13778 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13779
13780 pm_refute_numbered_parameter(parser, message.start, message.end);
13781 return target;
13782 }
13783
13784 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13785 // When we get here, we have a method call, because it was
13786 // previously marked as a method call but now we have an =. This
13787 // looks like:
13788 //
13789 // foo.bar = 1
13790 //
13791 // When it was parsed in the prefix position, foo.bar was seen as a
13792 // method call with no arguments. Now we have an =, so we know it's
13793 // a method call with an argument. In this case we will create the
13794 // arguments node, parse the argument, and add it to the list.
13795 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13796 call->arguments = arguments;
13797
13798 pm_arguments_node_arguments_append(arguments, value);
13799 call->base.location.end = arguments->base.location.end;
13800
13801 parse_write_name(parser, &call->name);
13802 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13803
13804 return (pm_node_t *) call;
13805 }
13806 }
13807
13808 // If there is no call operator and the message is "[]" then this is
13809 // an aref expression, and we can transform it into an aset
13810 // expression.
13811 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13812 if (call->arguments == NULL) {
13813 call->arguments = pm_arguments_node_create(parser);
13814 }
13815
13816 pm_arguments_node_arguments_append(call->arguments, value);
13817 target->location.end = value->location.end;
13818
13819 // Replace the name with "[]=".
13820 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13821
13822 // Ensure that the arguments for []= don't contain keywords
13823 pm_index_arguments_check(parser, call->arguments, call->block);
13824 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13825
13826 return target;
13827 }
13828
13829 // If there are arguments on the call node, then it can't be a method
13830 // call ending with = or a local variable write, so it must be a
13831 // syntax error. In this case we'll fall through to our default
13832 // handling. We need to free the value that we parsed because there
13833 // is no way for us to attach it to the tree at this point.
13834 pm_node_destroy(parser, value);
13835 }
13837 default:
13838 // In this case we have a node that we don't know how to convert into a
13839 // target. We need to treat it as an error. For now, we'll mark it as an
13840 // error and just skip right past it.
13841 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13842 return target;
13843 }
13844}
13845
13852static pm_node_t *
13853parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13854 switch (PM_NODE_TYPE(target)) {
13855 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13856 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13857 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13858 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13859 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13860 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13861 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13862 default: break;
13863 }
13864
13865 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13866 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13867
13868 pm_node_destroy(parser, target);
13869 return (pm_node_t *) result;
13870}
13871
13882static pm_node_t *
13883parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13884 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13885
13886 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13887 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13888
13889 while (accept1(parser, PM_TOKEN_COMMA)) {
13890 if (accept1(parser, PM_TOKEN_USTAR)) {
13891 // Here we have a splat operator. It can have a name or be
13892 // anonymous. It can be the final target or be in the middle if
13893 // there haven't been any others yet.
13894 if (has_rest) {
13895 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13896 }
13897
13898 pm_token_t star_operator = parser->previous;
13899 pm_node_t *name = NULL;
13900
13901 if (token_begins_expression_p(parser->current.type)) {
13902 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13903 name = parse_target(parser, name, true, true);
13904 }
13905
13906 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13907 pm_multi_target_node_targets_append(parser, result, splat);
13908 has_rest = true;
13909 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13910 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13911 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13912 target = parse_target(parser, target, true, false);
13913
13914 pm_multi_target_node_targets_append(parser, result, target);
13915 context_pop(parser);
13916 } else if (token_begins_expression_p(parser->current.type)) {
13917 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13918 target = parse_target(parser, target, true, false);
13919
13920 pm_multi_target_node_targets_append(parser, result, target);
13921 } else if (!match1(parser, PM_TOKEN_EOF)) {
13922 // If we get here, then we have a trailing , in a multi target node.
13923 // We'll add an implicit rest node to represent this.
13924 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13925 pm_multi_target_node_targets_append(parser, result, rest);
13926 break;
13927 }
13928 }
13929
13930 return (pm_node_t *) result;
13931}
13932
13937static pm_node_t *
13938parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13939 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13940 accept1(parser, PM_TOKEN_NEWLINE);
13941
13942 // Ensure that we have either an = or a ) after the targets.
13943 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13944 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13945 }
13946
13947 return result;
13948}
13949
13953static pm_statements_node_t *
13954parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13955 // First, skip past any optional terminators that might be at the beginning
13956 // of the statements.
13957 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13958
13959 // If we have a terminator, then we can just return NULL.
13960 if (context_terminator(context, &parser->current)) return NULL;
13961
13962 pm_statements_node_t *statements = pm_statements_node_create(parser);
13963
13964 // At this point we know we have at least one statement, and that it
13965 // immediately follows the current token.
13966 context_push(parser, context);
13967
13968 while (true) {
13969 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13970 pm_statements_node_body_append(parser, statements, node, true);
13971
13972 // If we're recovering from a syntax error, then we need to stop parsing
13973 // the statements now.
13974 if (parser->recovering) {
13975 // If this is the level of context where the recovery has happened,
13976 // then we can mark the parser as done recovering.
13977 if (context_terminator(context, &parser->current)) parser->recovering = false;
13978 break;
13979 }
13980
13981 // If we have a terminator, then we will parse all consecutive
13982 // terminators and then continue parsing the statements list.
13983 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13984 // If we have a terminator, then we will continue parsing the
13985 // statements list.
13986 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13987 if (context_terminator(context, &parser->current)) break;
13988
13989 // Now we can continue parsing the list of statements.
13990 continue;
13991 }
13992
13993 // At this point we have a list of statements that are not terminated by
13994 // a newline or semicolon. At this point we need to check if we're at
13995 // the end of the statements list. If we are, then we should break out
13996 // of the loop.
13997 if (context_terminator(context, &parser->current)) break;
13998
13999 // At this point, we have a syntax error, because the statement was not
14000 // terminated by a newline or semicolon, and we're not at the end of the
14001 // statements list. Ideally we should scan forward to determine if we
14002 // should insert a missing terminator or break out of parsing the
14003 // statements list at this point.
14004 //
14005 // We don't have that yet, so instead we'll do a more naive approach. If
14006 // we were unable to parse an expression, then we will skip past this
14007 // token and continue parsing the statements list. Otherwise we'll add
14008 // an error and continue parsing the statements list.
14009 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
14010 parser_lex(parser);
14011
14012 // If we are at the end of the file, then we need to stop parsing
14013 // the statements entirely at this point. Mark the parser as
14014 // recovering, as we know that EOF closes the top-level context, and
14015 // then break out of the loop.
14016 if (match1(parser, PM_TOKEN_EOF)) {
14017 parser->recovering = true;
14018 break;
14019 }
14020
14021 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14022 if (context_terminator(context, &parser->current)) break;
14023 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
14024 // This is an inlined version of accept1 because the error that we
14025 // want to add has varargs. If this happens again, we should
14026 // probably extract a helper function.
14027 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14028 parser->previous.start = parser->previous.end;
14029 parser->previous.type = PM_TOKEN_MISSING;
14030 }
14031 }
14032
14033 context_pop(parser);
14034 bool last_value = true;
14035 switch (context) {
14038 last_value = false;
14039 break;
14040 default:
14041 break;
14042 }
14043 pm_void_statements_check(parser, statements, last_value);
14044
14045 return statements;
14046}
14047
14052static void
14053pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14054 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
14055
14056 if (duplicated != NULL) {
14057 pm_buffer_t buffer = { 0 };
14058 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
14059
14060 pm_diagnostic_list_append_format(
14061 &parser->warning_list,
14062 duplicated->location.start,
14063 duplicated->location.end,
14064 PM_WARN_DUPLICATED_HASH_KEY,
14065 (int) pm_buffer_length(&buffer),
14066 pm_buffer_value(&buffer),
14067 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14068 );
14069
14070 pm_buffer_free(&buffer);
14071 }
14072}
14073
14078static void
14079pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14080 pm_node_t *previous;
14081
14082 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14083 pm_diagnostic_list_append_format(
14084 &parser->warning_list,
14085 node->location.start,
14086 node->location.end,
14087 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14088 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14089 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14090 );
14091 }
14092}
14093
14097static bool
14098parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14099 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
14100 bool contains_keyword_splat = false;
14101
14102 while (true) {
14103 pm_node_t *element;
14104
14105 switch (parser->current.type) {
14106 case PM_TOKEN_USTAR_STAR: {
14107 parser_lex(parser);
14108 pm_token_t operator = parser->previous;
14109 pm_node_t *value = NULL;
14110
14111 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14112 // If we're about to parse a nested hash that is being
14113 // pushed into this hash directly with **, then we want the
14114 // inner hash to share the static literals with the outer
14115 // hash.
14116 parser->current_hash_keys = literals;
14117 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14118 } else if (token_begins_expression_p(parser->current.type)) {
14119 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14120 } else {
14121 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14122 }
14123
14124 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14125 contains_keyword_splat = true;
14126 break;
14127 }
14128 case PM_TOKEN_LABEL: {
14129 pm_token_t label = parser->current;
14130 parser_lex(parser);
14131
14132 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14133 pm_hash_key_static_literals_add(parser, literals, key);
14134
14135 pm_token_t operator = not_provided(parser);
14136 pm_node_t *value = NULL;
14137
14138 if (token_begins_expression_p(parser->current.type)) {
14139 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14140 } else {
14141 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14142 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14143 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14144 } else {
14145 int depth = -1;
14146 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14147
14148 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14149 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14150 } else {
14151 depth = pm_parser_local_depth(parser, &identifier);
14152 }
14153
14154 if (depth == -1) {
14155 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14156 } else {
14157 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14158 }
14159 }
14160
14161 value->location.end++;
14162 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14163 }
14164
14165 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14166 break;
14167 }
14168 default: {
14169 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14170
14171 // Hash keys that are strings are automatically frozen. We will
14172 // mark that here.
14173 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14174 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14175 }
14176
14177 pm_hash_key_static_literals_add(parser, literals, key);
14178
14179 pm_token_t operator;
14180 if (pm_symbol_node_label_p(key)) {
14181 operator = not_provided(parser);
14182 } else {
14183 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14184 operator = parser->previous;
14185 }
14186
14187 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14188 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14189 break;
14190 }
14191 }
14192
14193 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14194 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14195 } else {
14196 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14197 }
14198
14199 // If there's no comma after the element, then we're done.
14200 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14201
14202 // If the next element starts with a label or a **, then we know we have
14203 // another element in the hash, so we'll continue parsing.
14204 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14205
14206 // Otherwise we need to check if the subsequent token begins an expression.
14207 // If it does, then we'll continue parsing.
14208 if (token_begins_expression_p(parser->current.type)) continue;
14209
14210 // Otherwise by default we will exit out of this loop.
14211 break;
14212 }
14213
14214 return contains_keyword_splat;
14215}
14216
14220static inline void
14221parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14222 if (arguments->arguments == NULL) {
14223 arguments->arguments = pm_arguments_node_create(parser);
14224 }
14225
14226 pm_arguments_node_arguments_append(arguments->arguments, argument);
14227}
14228
14232static void
14233parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14234 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14235
14236 // First we need to check if the next token is one that could be the start
14237 // of an argument. If it's not, then we can just return.
14238 if (
14239 match2(parser, terminator, PM_TOKEN_EOF) ||
14240 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14241 context_terminator(parser->current_context->context, &parser->current)
14242 ) {
14243 return;
14244 }
14245
14246 bool parsed_first_argument = false;
14247 bool parsed_bare_hash = false;
14248 bool parsed_block_argument = false;
14249 bool parsed_forwarding_arguments = false;
14250
14251 while (!match1(parser, PM_TOKEN_EOF)) {
14252 if (parsed_forwarding_arguments) {
14253 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14254 }
14255
14256 pm_node_t *argument = NULL;
14257
14258 switch (parser->current.type) {
14259 case PM_TOKEN_USTAR_STAR:
14260 case PM_TOKEN_LABEL: {
14261 if (parsed_bare_hash) {
14262 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14263 }
14264
14265 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14266 argument = (pm_node_t *) hash;
14267
14268 pm_static_literals_t hash_keys = { 0 };
14269 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14270
14271 parse_arguments_append(parser, arguments, argument);
14272
14273 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14274 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14275 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14276
14277 pm_static_literals_free(&hash_keys);
14278 parsed_bare_hash = true;
14279
14280 break;
14281 }
14282 case PM_TOKEN_UAMPERSAND: {
14283 parser_lex(parser);
14284 pm_token_t operator = parser->previous;
14285 pm_node_t *expression = NULL;
14286
14287 if (token_begins_expression_p(parser->current.type)) {
14288 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14289 } else {
14290 pm_parser_scope_forwarding_block_check(parser, &operator);
14291 }
14292
14293 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14294 if (parsed_block_argument) {
14295 parse_arguments_append(parser, arguments, argument);
14296 } else {
14297 arguments->block = argument;
14298 }
14299
14300 if (match1(parser, PM_TOKEN_COMMA)) {
14301 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14302 }
14303
14304 parsed_block_argument = true;
14305 break;
14306 }
14307 case PM_TOKEN_USTAR: {
14308 parser_lex(parser);
14309 pm_token_t operator = parser->previous;
14310
14311 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14312 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14313 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14314 if (parsed_bare_hash) {
14315 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14316 }
14317 } else {
14318 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14319
14320 if (parsed_bare_hash) {
14321 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14322 }
14323
14324 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14325 }
14326
14327 parse_arguments_append(parser, arguments, argument);
14328 break;
14329 }
14330 case PM_TOKEN_UDOT_DOT_DOT: {
14331 if (accepts_forwarding) {
14332 parser_lex(parser);
14333
14334 if (token_begins_expression_p(parser->current.type)) {
14335 // If the token begins an expression then this ... was
14336 // not actually argument forwarding but was instead a
14337 // range.
14338 pm_token_t operator = parser->previous;
14339 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14340
14341 // If we parse a range, we need to validate that we
14342 // didn't accidentally violate the nonassoc rules of the
14343 // ... operator.
14344 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14345 pm_range_node_t *range = (pm_range_node_t *) right;
14346 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14347 }
14348
14349 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14350 } else {
14351 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14352 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14353 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14354 }
14355
14356 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14357 parse_arguments_append(parser, arguments, argument);
14358 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14359 arguments->has_forwarding = true;
14360 parsed_forwarding_arguments = true;
14361 break;
14362 }
14363 }
14364 }
14366 default: {
14367 if (argument == NULL) {
14368 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14369 }
14370
14371 bool contains_keywords = false;
14372 bool contains_keyword_splat = false;
14373
14374 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14375 if (parsed_bare_hash) {
14376 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14377 }
14378
14379 pm_token_t operator;
14380 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14381 operator = parser->previous;
14382 } else {
14383 operator = not_provided(parser);
14384 }
14385
14386 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14387 contains_keywords = true;
14388
14389 // Create the set of static literals for this hash.
14390 pm_static_literals_t hash_keys = { 0 };
14391 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14392
14393 // Finish parsing the one we are part way through.
14394 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14395 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14396
14397 pm_keyword_hash_node_elements_append(bare_hash, argument);
14398 argument = (pm_node_t *) bare_hash;
14399
14400 // Then parse more if we have a comma
14401 if (accept1(parser, PM_TOKEN_COMMA) && (
14402 token_begins_expression_p(parser->current.type) ||
14403 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14404 )) {
14405 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14406 }
14407
14408 pm_static_literals_free(&hash_keys);
14409 parsed_bare_hash = true;
14410 }
14411
14412 parse_arguments_append(parser, arguments, argument);
14413
14414 pm_node_flags_t flags = 0;
14415 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14416 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14417 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14418
14419 break;
14420 }
14421 }
14422
14423 parsed_first_argument = true;
14424
14425 // If parsing the argument failed, we need to stop parsing arguments.
14426 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14427
14428 // If the terminator of these arguments is not EOF, then we have a
14429 // specific token we're looking for. In that case we can accept a
14430 // newline here because it is not functioning as a statement terminator.
14431 bool accepted_newline = false;
14432 if (terminator != PM_TOKEN_EOF) {
14433 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14434 }
14435
14436 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14437 // If we previously were on a comma and we just parsed a bare hash,
14438 // then we want to continue parsing arguments. This is because the
14439 // comma was grabbed up by the hash parser.
14440 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14441 // If there was a comma, then we need to check if we also accepted a
14442 // newline. If we did, then this is a syntax error.
14443 if (accepted_newline) {
14444 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14445 }
14446 } else {
14447 // If there is no comma at the end of the argument list then we're
14448 // done parsing arguments and can break out of this loop.
14449 break;
14450 }
14451
14452 // If we hit the terminator, then that means we have a trailing comma so
14453 // we can accept that output as well.
14454 if (match1(parser, terminator)) break;
14455 }
14456}
14457
14469parse_required_destructured_parameter(pm_parser_t *parser) {
14470 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14471
14472 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14473 pm_multi_target_node_opening_set(node, &parser->previous);
14474
14475 do {
14476 pm_node_t *param;
14477
14478 // If we get here then we have a trailing comma, which isn't allowed in
14479 // the grammar. In other places, multi targets _do_ allow trailing
14480 // commas, so here we'll assume this is a mistake of the user not
14481 // knowing it's not allowed here.
14482 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14483 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14484 pm_multi_target_node_targets_append(parser, node, param);
14485 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14486 break;
14487 }
14488
14489 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14490 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14491 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14492 pm_token_t star = parser->previous;
14493 pm_node_t *value = NULL;
14494
14495 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14496 pm_token_t name = parser->previous;
14497 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14498 if (pm_parser_parameter_name_check(parser, &name)) {
14499 pm_node_flag_set_repeated_parameter(value);
14500 }
14501 pm_parser_local_add_token(parser, &name, 1);
14502 }
14503
14504 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14505 } else {
14506 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14507 pm_token_t name = parser->previous;
14508
14509 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14510 if (pm_parser_parameter_name_check(parser, &name)) {
14511 pm_node_flag_set_repeated_parameter(param);
14512 }
14513 pm_parser_local_add_token(parser, &name, 1);
14514 }
14515
14516 pm_multi_target_node_targets_append(parser, node, param);
14517 } while (accept1(parser, PM_TOKEN_COMMA));
14518
14519 accept1(parser, PM_TOKEN_NEWLINE);
14520 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14521 pm_multi_target_node_closing_set(node, &parser->previous);
14522
14523 return node;
14524}
14525
14530typedef enum {
14531 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14532 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14533 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14534 PM_PARAMETERS_ORDER_KEYWORDS,
14535 PM_PARAMETERS_ORDER_REST,
14536 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14537 PM_PARAMETERS_ORDER_OPTIONAL,
14538 PM_PARAMETERS_ORDER_NAMED,
14539 PM_PARAMETERS_ORDER_NONE,
14540} pm_parameters_order_t;
14541
14545static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14546 [0] = PM_PARAMETERS_NO_CHANGE,
14547 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14548 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14549 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14550 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14551 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14552 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14553 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14554 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14555 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14556 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14557 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14558};
14559
14567static bool
14568update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14569 pm_parameters_order_t state = parameters_ordering[token->type];
14570 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14571
14572 // If we see another ordered argument after a optional argument
14573 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14574 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14575 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14576 return true;
14577 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14578 return true;
14579 }
14580
14581 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14582 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14583 return false;
14584 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14585 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14586 return false;
14587 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14588 // We know what transition we failed on, so we can provide a better error here.
14589 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14590 return false;
14591 }
14592
14593 if (state < *current) *current = state;
14594 return true;
14595}
14596
14600static pm_parameters_node_t *
14601parse_parameters(
14602 pm_parser_t *parser,
14603 pm_binding_power_t binding_power,
14604 bool uses_parentheses,
14605 bool allows_trailing_comma,
14606 bool allows_forwarding_parameters,
14607 bool accepts_blocks_in_defaults,
14608 bool in_block,
14609 uint16_t depth
14610) {
14611 pm_do_loop_stack_push(parser, false);
14612
14613 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14614 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14615
14616 while (true) {
14617 bool parsing = true;
14618
14619 switch (parser->current.type) {
14620 case PM_TOKEN_PARENTHESIS_LEFT: {
14621 update_parameter_state(parser, &parser->current, &order);
14622 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14623
14624 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14625 pm_parameters_node_requireds_append(params, param);
14626 } else {
14627 pm_parameters_node_posts_append(params, param);
14628 }
14629 break;
14630 }
14631 case PM_TOKEN_UAMPERSAND:
14632 case PM_TOKEN_AMPERSAND: {
14633 update_parameter_state(parser, &parser->current, &order);
14634 parser_lex(parser);
14635
14636 pm_token_t operator = parser->previous;
14637 pm_token_t name;
14638
14639 bool repeated = false;
14640 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14641 name = parser->previous;
14642 repeated = pm_parser_parameter_name_check(parser, &name);
14643 pm_parser_local_add_token(parser, &name, 1);
14644 } else {
14645 name = not_provided(parser);
14646 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14647 }
14648
14649 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14650 if (repeated) {
14651 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14652 }
14653 if (params->block == NULL) {
14654 pm_parameters_node_block_set(params, param);
14655 } else {
14656 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14657 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14658 }
14659
14660 break;
14661 }
14662 case PM_TOKEN_UDOT_DOT_DOT: {
14663 if (!allows_forwarding_parameters) {
14664 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14665 }
14666
14667 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14668 parser_lex(parser);
14669
14670 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14671 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14672
14673 if (params->keyword_rest != NULL) {
14674 // If we already have a keyword rest parameter, then we replace it with the
14675 // forwarding parameter and move the keyword rest parameter to the posts list.
14676 pm_node_t *keyword_rest = params->keyword_rest;
14677 pm_parameters_node_posts_append(params, keyword_rest);
14678 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14679 params->keyword_rest = NULL;
14680 }
14681
14682 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14683 break;
14684 }
14685 case PM_TOKEN_CLASS_VARIABLE:
14686 case PM_TOKEN_IDENTIFIER:
14687 case PM_TOKEN_CONSTANT:
14688 case PM_TOKEN_INSTANCE_VARIABLE:
14689 case PM_TOKEN_GLOBAL_VARIABLE:
14690 case PM_TOKEN_METHOD_NAME: {
14691 parser_lex(parser);
14692 switch (parser->previous.type) {
14693 case PM_TOKEN_CONSTANT:
14694 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14695 break;
14696 case PM_TOKEN_INSTANCE_VARIABLE:
14697 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14698 break;
14699 case PM_TOKEN_GLOBAL_VARIABLE:
14700 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14701 break;
14702 case PM_TOKEN_CLASS_VARIABLE:
14703 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14704 break;
14705 case PM_TOKEN_METHOD_NAME:
14706 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14707 break;
14708 default: break;
14709 }
14710
14711 if (parser->current.type == PM_TOKEN_EQUAL) {
14712 update_parameter_state(parser, &parser->current, &order);
14713 } else {
14714 update_parameter_state(parser, &parser->previous, &order);
14715 }
14716
14717 pm_token_t name = parser->previous;
14718 bool repeated = pm_parser_parameter_name_check(parser, &name);
14719 pm_parser_local_add_token(parser, &name, 1);
14720
14721 if (match1(parser, PM_TOKEN_EQUAL)) {
14722 pm_token_t operator = parser->current;
14723 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14724 parser_lex(parser);
14725
14726 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14727 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14728
14729 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14730 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14731 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14732
14733 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14734
14735 if (repeated) {
14736 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14737 }
14738 pm_parameters_node_optionals_append(params, param);
14739
14740 // If the value of the parameter increased the number of
14741 // reads of that parameter, then we need to warn that we
14742 // have a circular definition.
14743 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14744 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14745 }
14746
14747 context_pop(parser);
14748
14749 // If parsing the value of the parameter resulted in error recovery,
14750 // then we can put a missing node in its place and stop parsing the
14751 // parameters entirely now.
14752 if (parser->recovering) {
14753 parsing = false;
14754 break;
14755 }
14756 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14757 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14758 if (repeated) {
14759 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14760 }
14761 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14762 } else {
14763 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14764 if (repeated) {
14765 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14766 }
14767 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14768 }
14769
14770 break;
14771 }
14772 case PM_TOKEN_LABEL: {
14773 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14774 update_parameter_state(parser, &parser->current, &order);
14775
14776 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14777 parser_lex(parser);
14778
14779 pm_token_t name = parser->previous;
14780 pm_token_t local = name;
14781 local.end -= 1;
14782
14783 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14784 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14785 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14786 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14787 }
14788
14789 bool repeated = pm_parser_parameter_name_check(parser, &local);
14790 pm_parser_local_add_token(parser, &local, 1);
14791
14792 switch (parser->current.type) {
14793 case PM_TOKEN_COMMA:
14794 case PM_TOKEN_PARENTHESIS_RIGHT:
14795 case PM_TOKEN_PIPE: {
14796 context_pop(parser);
14797
14798 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14799 if (repeated) {
14800 pm_node_flag_set_repeated_parameter(param);
14801 }
14802
14803 pm_parameters_node_keywords_append(params, param);
14804 break;
14805 }
14806 case PM_TOKEN_SEMICOLON:
14807 case PM_TOKEN_NEWLINE: {
14808 context_pop(parser);
14809
14810 if (uses_parentheses) {
14811 parsing = false;
14812 break;
14813 }
14814
14815 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14816 if (repeated) {
14817 pm_node_flag_set_repeated_parameter(param);
14818 }
14819
14820 pm_parameters_node_keywords_append(params, param);
14821 break;
14822 }
14823 default: {
14824 pm_node_t *param;
14825
14826 if (token_begins_expression_p(parser->current.type)) {
14827 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14828 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14829
14830 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14831 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14832 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14833
14834 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14835 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14836 }
14837
14838 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14839 }
14840 else {
14841 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14842 }
14843
14844 if (repeated) {
14845 pm_node_flag_set_repeated_parameter(param);
14846 }
14847
14848 context_pop(parser);
14849 pm_parameters_node_keywords_append(params, param);
14850
14851 // If parsing the value of the parameter resulted in error recovery,
14852 // then we can put a missing node in its place and stop parsing the
14853 // parameters entirely now.
14854 if (parser->recovering) {
14855 parsing = false;
14856 break;
14857 }
14858 }
14859 }
14860
14861 parser->in_keyword_arg = false;
14862 break;
14863 }
14864 case PM_TOKEN_USTAR:
14865 case PM_TOKEN_STAR: {
14866 update_parameter_state(parser, &parser->current, &order);
14867 parser_lex(parser);
14868
14869 pm_token_t operator = parser->previous;
14870 pm_token_t name;
14871 bool repeated = false;
14872
14873 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14874 name = parser->previous;
14875 repeated = pm_parser_parameter_name_check(parser, &name);
14876 pm_parser_local_add_token(parser, &name, 1);
14877 } else {
14878 name = not_provided(parser);
14879 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14880 }
14881
14882 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14883 if (repeated) {
14884 pm_node_flag_set_repeated_parameter(param);
14885 }
14886
14887 if (params->rest == NULL) {
14888 pm_parameters_node_rest_set(params, param);
14889 } else {
14890 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14891 pm_parameters_node_posts_append(params, param);
14892 }
14893
14894 break;
14895 }
14896 case PM_TOKEN_STAR_STAR:
14897 case PM_TOKEN_USTAR_STAR: {
14898 pm_parameters_order_t previous_order = order;
14899 update_parameter_state(parser, &parser->current, &order);
14900 parser_lex(parser);
14901
14902 pm_token_t operator = parser->previous;
14903 pm_node_t *param;
14904
14905 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14906 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14907 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14908 }
14909
14910 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14911 } else {
14912 pm_token_t name;
14913
14914 bool repeated = false;
14915 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14916 name = parser->previous;
14917 repeated = pm_parser_parameter_name_check(parser, &name);
14918 pm_parser_local_add_token(parser, &name, 1);
14919 } else {
14920 name = not_provided(parser);
14921 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14922 }
14923
14924 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14925 if (repeated) {
14926 pm_node_flag_set_repeated_parameter(param);
14927 }
14928 }
14929
14930 if (params->keyword_rest == NULL) {
14931 pm_parameters_node_keyword_rest_set(params, param);
14932 } else {
14933 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14934 pm_parameters_node_posts_append(params, param);
14935 }
14936
14937 break;
14938 }
14939 default:
14940 if (parser->previous.type == PM_TOKEN_COMMA) {
14941 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14942 // If we get here, then we have a trailing comma in a
14943 // block parameter list.
14944 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14945
14946 if (params->rest == NULL) {
14947 pm_parameters_node_rest_set(params, param);
14948 } else {
14949 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14950 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14951 }
14952 } else {
14953 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14954 }
14955 }
14956
14957 parsing = false;
14958 break;
14959 }
14960
14961 // If we hit some kind of issue while parsing the parameter, this would
14962 // have been set to false. In that case, we need to break out of the
14963 // loop.
14964 if (!parsing) break;
14965
14966 bool accepted_newline = false;
14967 if (uses_parentheses) {
14968 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14969 }
14970
14971 if (accept1(parser, PM_TOKEN_COMMA)) {
14972 // If there was a comma, but we also accepted a newline, then this
14973 // is a syntax error.
14974 if (accepted_newline) {
14975 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14976 }
14977 } else {
14978 // If there was no comma, then we're done parsing parameters.
14979 break;
14980 }
14981 }
14982
14983 pm_do_loop_stack_pop(parser);
14984
14985 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14986 if (params->base.location.start == params->base.location.end) {
14987 pm_node_destroy(parser, (pm_node_t *) params);
14988 return NULL;
14989 }
14990
14991 return params;
14992}
14993
14998static size_t
14999token_newline_index(const pm_parser_t *parser) {
15000 if (parser->heredoc_end == NULL) {
15001 // This is the common case. In this case we can look at the previously
15002 // recorded newline in the newline list and subtract from the current
15003 // offset.
15004 return parser->newline_list.size - 1;
15005 } else {
15006 // This is unlikely. This is the case that we have already parsed the
15007 // start of a heredoc, so we cannot rely on looking at the previous
15008 // offset of the newline list, and instead must go through the whole
15009 // process of a binary search for the line number.
15010 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
15011 }
15012}
15013
15018static int64_t
15019token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
15020 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
15021 const uint8_t *end = token->start;
15022
15023 // Skip over the BOM if it is present.
15024 if (
15025 newline_index == 0 &&
15026 parser->start[0] == 0xef &&
15027 parser->start[1] == 0xbb &&
15028 parser->start[2] == 0xbf
15029 ) cursor += 3;
15030
15031 int64_t column = 0;
15032 for (; cursor < end; cursor++) {
15033 switch (*cursor) {
15034 case '\t':
15035 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
15036 break;
15037 case ' ':
15038 column++;
15039 break;
15040 default:
15041 column++;
15042 if (break_on_non_space) return -1;
15043 break;
15044 }
15045 }
15046
15047 return column;
15048}
15049
15054static void
15055parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
15056 // If these warnings are disabled (unlikely), then we can just return.
15057 if (!parser->warn_mismatched_indentation) return;
15058
15059 // If the tokens are on the same line, we do not warn.
15060 size_t closing_newline_index = token_newline_index(parser);
15061 if (opening_newline_index == closing_newline_index) return;
15062
15063 // If the opening token has anything other than spaces or tabs before it,
15064 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15065 // and the `if` immediately follows an `else` keyword.
15066 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15067 if (!if_after_else && (opening_column == -1)) return;
15068
15069 // Get a reference to the closing token off the current parser. This assumes
15070 // that the caller has placed this in the correct position.
15071 pm_token_t *closing_token = &parser->current;
15072
15073 // If the tokens are at the same indentation, we do not warn.
15074 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15075 if ((closing_column == -1) || (opening_column == closing_column)) return;
15076
15077 // If the closing column is greater than the opening column and we are
15078 // allowing indentation, then we do not warn.
15079 if (allow_indent && (closing_column > opening_column)) return;
15080
15081 // Otherwise, add a warning.
15082 PM_PARSER_WARN_FORMAT(
15083 parser,
15084 closing_token->start,
15085 closing_token->end,
15086 PM_WARN_INDENTATION_MISMATCH,
15087 (int) (closing_token->end - closing_token->start),
15088 (const char *) closing_token->start,
15089 (int) (opening_token->end - opening_token->start),
15090 (const char *) opening_token->start,
15091 ((int32_t) opening_newline_index) + parser->start_line
15092 );
15093}
15094
15095typedef enum {
15096 PM_RESCUES_BEGIN = 1,
15097 PM_RESCUES_BLOCK,
15098 PM_RESCUES_CLASS,
15099 PM_RESCUES_DEF,
15100 PM_RESCUES_LAMBDA,
15101 PM_RESCUES_MODULE,
15102 PM_RESCUES_SCLASS
15103} pm_rescues_type_t;
15104
15109static inline void
15110parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15111 pm_rescue_node_t *current = NULL;
15112
15113 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15114 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15115 parser_lex(parser);
15116
15117 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15118
15119 switch (parser->current.type) {
15120 case PM_TOKEN_EQUAL_GREATER: {
15121 // Here we have an immediate => after the rescue keyword, in which case
15122 // we're going to have an empty list of exceptions to rescue (which
15123 // implies StandardError).
15124 parser_lex(parser);
15125 pm_rescue_node_operator_set(rescue, &parser->previous);
15126
15127 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15128 reference = parse_target(parser, reference, false, false);
15129
15130 pm_rescue_node_reference_set(rescue, reference);
15131 break;
15132 }
15133 case PM_TOKEN_NEWLINE:
15134 case PM_TOKEN_SEMICOLON:
15135 case PM_TOKEN_KEYWORD_THEN:
15136 // Here we have a terminator for the rescue keyword, in which
15137 // case we're going to just continue on.
15138 break;
15139 default: {
15140 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15141 // Here we have something that could be an exception expression, so
15142 // we'll attempt to parse it here and any others delimited by commas.
15143
15144 do {
15145 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15146 pm_rescue_node_exceptions_append(rescue, expression);
15147
15148 // If we hit a newline, then this is the end of the rescue expression. We
15149 // can continue on to parse the statements.
15150 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15151
15152 // If we hit a `=>` then we're going to parse the exception variable. Once
15153 // we've done that, we'll break out of the loop and parse the statements.
15154 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15155 pm_rescue_node_operator_set(rescue, &parser->previous);
15156
15157 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15158 reference = parse_target(parser, reference, false, false);
15159
15160 pm_rescue_node_reference_set(rescue, reference);
15161 break;
15162 }
15163 } while (accept1(parser, PM_TOKEN_COMMA));
15164 }
15165 }
15166 }
15167
15168 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15169 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15170 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15171 }
15172 } else {
15173 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15174 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15175 }
15176
15177 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
15178 pm_accepts_block_stack_push(parser, true);
15179 pm_context_t context;
15180
15181 switch (type) {
15182 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15183 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15184 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15185 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15186 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15187 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15188 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15189 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15190 }
15191
15192 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15193 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15194
15195 pm_accepts_block_stack_pop(parser);
15196 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15197 }
15198
15199 if (current == NULL) {
15200 pm_begin_node_rescue_clause_set(parent_node, rescue);
15201 } else {
15202 pm_rescue_node_subsequent_set(current, rescue);
15203 }
15204
15205 current = rescue;
15206 }
15207
15208 // The end node locations on rescue nodes will not be set correctly
15209 // since we won't know the end until we've found all subsequent
15210 // clauses. This sets the end location on all rescues once we know it.
15211 if (current != NULL) {
15212 const uint8_t *end_to_set = current->base.location.end;
15213 pm_rescue_node_t *clause = parent_node->rescue_clause;
15214
15215 while (clause != NULL) {
15216 clause->base.location.end = end_to_set;
15217 clause = clause->subsequent;
15218 }
15219 }
15220
15221 pm_token_t else_keyword;
15222 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15223 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15224 opening_newline_index = token_newline_index(parser);
15225
15226 else_keyword = parser->current;
15227 opening = &else_keyword;
15228
15229 parser_lex(parser);
15230 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15231
15232 pm_statements_node_t *else_statements = NULL;
15233 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15234 pm_accepts_block_stack_push(parser, true);
15235 pm_context_t context;
15236
15237 switch (type) {
15238 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15239 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15240 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15241 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15242 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15243 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15244 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15245 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15246 }
15247
15248 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15249 pm_accepts_block_stack_pop(parser);
15250
15251 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15252 }
15253
15254 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15255 pm_begin_node_else_clause_set(parent_node, else_clause);
15256
15257 // If we don't have a `current` rescue node, then this is a dangling
15258 // else, and it's an error.
15259 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15260 }
15261
15262 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15263 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15264 pm_token_t ensure_keyword = parser->current;
15265
15266 parser_lex(parser);
15267 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15268
15269 pm_statements_node_t *ensure_statements = NULL;
15270 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15271 pm_accepts_block_stack_push(parser, true);
15272 pm_context_t context;
15273
15274 switch (type) {
15275 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15276 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15277 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15278 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15279 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15280 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15281 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15282 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15283 }
15284
15285 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15286 pm_accepts_block_stack_pop(parser);
15287
15288 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15289 }
15290
15291 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15292 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15293 }
15294
15295 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15296 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15297 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15298 } else {
15299 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15300 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15301 }
15302}
15303
15308static pm_begin_node_t *
15309parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15310 pm_token_t begin_keyword = not_provided(parser);
15311 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15312
15313 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15314 node->base.location.start = start;
15315
15316 return node;
15317}
15318
15323parse_block_parameters(
15324 pm_parser_t *parser,
15325 bool allows_trailing_comma,
15326 const pm_token_t *opening,
15327 bool is_lambda_literal,
15328 bool accepts_blocks_in_defaults,
15329 uint16_t depth
15330) {
15331 pm_parameters_node_t *parameters = NULL;
15332 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15333 parameters = parse_parameters(
15334 parser,
15335 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15336 false,
15337 allows_trailing_comma,
15338 false,
15339 accepts_blocks_in_defaults,
15340 true,
15341 (uint16_t) (depth + 1)
15342 );
15343 }
15344
15345 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15346 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15347 accept1(parser, PM_TOKEN_NEWLINE);
15348
15349 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15350 do {
15351 switch (parser->current.type) {
15352 case PM_TOKEN_CONSTANT:
15353 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15354 parser_lex(parser);
15355 break;
15356 case PM_TOKEN_INSTANCE_VARIABLE:
15357 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15358 parser_lex(parser);
15359 break;
15360 case PM_TOKEN_GLOBAL_VARIABLE:
15361 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15362 parser_lex(parser);
15363 break;
15364 case PM_TOKEN_CLASS_VARIABLE:
15365 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15366 parser_lex(parser);
15367 break;
15368 default:
15369 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15370 break;
15371 }
15372
15373 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15374 pm_parser_local_add_token(parser, &parser->previous, 1);
15375
15376 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15377 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15378
15379 pm_block_parameters_node_append_local(block_parameters, local);
15380 } while (accept1(parser, PM_TOKEN_COMMA));
15381 }
15382 }
15383
15384 return block_parameters;
15385}
15386
15391static bool
15392outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15393 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15394 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15395 }
15396
15397 return false;
15398}
15399
15405static const char * const pm_numbered_parameter_names[] = {
15406 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15407};
15408
15414static pm_node_t *
15415parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15416 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15417
15418 // If we have ordinary parameters, then we will return them as the set of
15419 // parameters.
15420 if (parameters != NULL) {
15421 // If we also have implicit parameters, then this is an error.
15422 if (implicit_parameters->size > 0) {
15423 pm_node_t *node = implicit_parameters->nodes[0];
15424
15425 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15426 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15427 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15428 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15429 } else {
15430 assert(false && "unreachable");
15431 }
15432 }
15433
15434 return parameters;
15435 }
15436
15437 // If we don't have any implicit parameters, then the set of parameters is
15438 // NULL.
15439 if (implicit_parameters->size == 0) {
15440 return NULL;
15441 }
15442
15443 // If we don't have ordinary parameters, then we now must validate our set
15444 // of implicit parameters. We can only have numbered parameters or it, but
15445 // they cannot be mixed.
15446 uint8_t numbered_parameter = 0;
15447 bool it_parameter = false;
15448
15449 for (size_t index = 0; index < implicit_parameters->size; index++) {
15450 pm_node_t *node = implicit_parameters->nodes[index];
15451
15452 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15453 if (it_parameter) {
15454 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15455 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15456 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15457 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15458 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15459 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15460 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15461 } else {
15462 assert(false && "unreachable");
15463 }
15464 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15465 if (numbered_parameter > 0) {
15466 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15467 } else {
15468 it_parameter = true;
15469 }
15470 }
15471 }
15472
15473 if (numbered_parameter > 0) {
15474 // Go through the parent scopes and mark them as being disallowed from
15475 // using numbered parameters because this inner scope is using them.
15476 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15477 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15478 }
15479
15480 const pm_location_t location = { .start = opening->start, .end = closing->end };
15481 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15482 }
15483
15484 if (it_parameter) {
15485 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15486 }
15487
15488 return NULL;
15489}
15490
15494static pm_block_node_t *
15495parse_block(pm_parser_t *parser, uint16_t depth) {
15496 pm_token_t opening = parser->previous;
15497 accept1(parser, PM_TOKEN_NEWLINE);
15498
15499 pm_accepts_block_stack_push(parser, true);
15500 pm_parser_scope_push(parser, false);
15501
15502 pm_block_parameters_node_t *block_parameters = NULL;
15503
15504 if (accept1(parser, PM_TOKEN_PIPE)) {
15505 pm_token_t block_parameters_opening = parser->previous;
15506 if (match1(parser, PM_TOKEN_PIPE)) {
15507 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15508 parser->command_start = true;
15509 parser_lex(parser);
15510 } else {
15511 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15512 accept1(parser, PM_TOKEN_NEWLINE);
15513 parser->command_start = true;
15514 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15515 }
15516
15517 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15518 }
15519
15520 accept1(parser, PM_TOKEN_NEWLINE);
15521 pm_node_t *statements = NULL;
15522
15523 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15524 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15525 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15526 }
15527
15528 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15529 } else {
15530 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15531 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15532 pm_accepts_block_stack_push(parser, true);
15533 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15534 pm_accepts_block_stack_pop(parser);
15535 }
15536
15537 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15538 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15539 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15540 }
15541 }
15542
15543 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15544 }
15545
15546 pm_constant_id_list_t locals;
15547 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15548 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15549
15550 pm_parser_scope_pop(parser);
15551 pm_accepts_block_stack_pop(parser);
15552
15553 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15554}
15555
15561static bool
15562parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15563 bool found = false;
15564
15565 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15566 found |= true;
15567 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15568
15569 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15570 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15571 } else {
15572 pm_accepts_block_stack_push(parser, true);
15573 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15574
15575 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15576 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15577 parser->previous.start = parser->previous.end;
15578 parser->previous.type = PM_TOKEN_MISSING;
15579 }
15580
15581 pm_accepts_block_stack_pop(parser);
15582 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15583 }
15584 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15585 found |= true;
15586 pm_accepts_block_stack_push(parser, false);
15587
15588 // If we get here, then the subsequent token cannot be used as an infix
15589 // operator. In this case we assume the subsequent token is part of an
15590 // argument to this method call.
15591 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15592
15593 // If we have done with the arguments and still not consumed the comma,
15594 // then we have a trailing comma where we need to check whether it is
15595 // allowed or not.
15596 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15597 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15598 }
15599
15600 pm_accepts_block_stack_pop(parser);
15601 }
15602
15603 // If we're at the end of the arguments, we can now check if there is a block
15604 // node that starts with a {. If there is, then we can parse it and add it to
15605 // the arguments.
15606 if (accepts_block) {
15607 pm_block_node_t *block = NULL;
15608
15609 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15610 found |= true;
15611 block = parse_block(parser, (uint16_t) (depth + 1));
15612 pm_arguments_validate_block(parser, arguments, block);
15613 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15614 found |= true;
15615 block = parse_block(parser, (uint16_t) (depth + 1));
15616 }
15617
15618 if (block != NULL) {
15619 if (arguments->block == NULL && !arguments->has_forwarding) {
15620 arguments->block = (pm_node_t *) block;
15621 } else {
15622 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15623
15624 if (arguments->block != NULL) {
15625 if (arguments->arguments == NULL) {
15626 arguments->arguments = pm_arguments_node_create(parser);
15627 }
15628 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15629 }
15630 arguments->block = (pm_node_t *) block;
15631 }
15632 }
15633 }
15634
15635 return found;
15636}
15637
15642static void
15643parse_return(pm_parser_t *parser, pm_node_t *node) {
15644 bool in_sclass = false;
15645 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15646 switch (context_node->context) {
15650 case PM_CONTEXT_BEGIN:
15651 case PM_CONTEXT_CASE_IN:
15654 case PM_CONTEXT_DEFINED:
15655 case PM_CONTEXT_ELSE:
15656 case PM_CONTEXT_ELSIF:
15657 case PM_CONTEXT_EMBEXPR:
15659 case PM_CONTEXT_FOR:
15660 case PM_CONTEXT_IF:
15662 case PM_CONTEXT_MAIN:
15664 case PM_CONTEXT_PARENS:
15665 case PM_CONTEXT_POSTEXE:
15667 case PM_CONTEXT_PREEXE:
15669 case PM_CONTEXT_TERNARY:
15670 case PM_CONTEXT_UNLESS:
15671 case PM_CONTEXT_UNTIL:
15672 case PM_CONTEXT_WHILE:
15673 // Keep iterating up the lists of contexts, because returns can
15674 // see through these.
15675 continue;
15679 case PM_CONTEXT_SCLASS:
15680 in_sclass = true;
15681 continue;
15685 case PM_CONTEXT_CLASS:
15689 case PM_CONTEXT_MODULE:
15690 // These contexts are invalid for a return.
15691 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15692 return;
15702 case PM_CONTEXT_DEF:
15708 // These contexts are valid for a return, and we should not
15709 // continue to loop.
15710 return;
15711 case PM_CONTEXT_NONE:
15712 // This case should never happen.
15713 assert(false && "unreachable");
15714 break;
15715 }
15716 }
15717 if (in_sclass) {
15718 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15719 }
15720}
15721
15726static void
15727parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15728 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15729 switch (context_node->context) {
15735 case PM_CONTEXT_DEFINED:
15736 case PM_CONTEXT_FOR:
15743 case PM_CONTEXT_POSTEXE:
15744 case PM_CONTEXT_UNTIL:
15745 case PM_CONTEXT_WHILE:
15746 // These are the good cases. We're allowed to have a block exit
15747 // in these contexts.
15748 return;
15749 case PM_CONTEXT_DEF:
15754 case PM_CONTEXT_MAIN:
15755 case PM_CONTEXT_PREEXE:
15756 case PM_CONTEXT_SCLASS:
15760 // These are the bad cases. We're not allowed to have a block
15761 // exit in these contexts.
15762 //
15763 // If we get here, then we're about to mark this block exit
15764 // as invalid. However, it could later _become_ valid if we
15765 // find a trailing while/until on the expression. In this
15766 // case instead of adding the error here, we'll add the
15767 // block exit to the list of exits for the expression, and
15768 // the node parsing will handle validating it instead.
15769 assert(parser->current_block_exits != NULL);
15770 pm_node_list_append(parser->current_block_exits, node);
15771 return;
15775 case PM_CONTEXT_BEGIN:
15776 case PM_CONTEXT_CASE_IN:
15781 case PM_CONTEXT_CLASS:
15783 case PM_CONTEXT_ELSE:
15784 case PM_CONTEXT_ELSIF:
15785 case PM_CONTEXT_EMBEXPR:
15787 case PM_CONTEXT_IF:
15791 case PM_CONTEXT_MODULE:
15793 case PM_CONTEXT_PARENS:
15796 case PM_CONTEXT_TERNARY:
15797 case PM_CONTEXT_UNLESS:
15798 // In these contexts we should continue walking up the list of
15799 // contexts.
15800 break;
15801 case PM_CONTEXT_NONE:
15802 // This case should never happen.
15803 assert(false && "unreachable");
15804 break;
15805 }
15806 }
15807}
15808
15813static pm_node_list_t *
15814push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15815 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15816 parser->current_block_exits = current_block_exits;
15817 return previous_block_exits;
15818}
15819
15825static void
15826flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15827 pm_node_t *block_exit;
15828 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15829 const char *type;
15830
15831 switch (PM_NODE_TYPE(block_exit)) {
15832 case PM_BREAK_NODE: type = "break"; break;
15833 case PM_NEXT_NODE: type = "next"; break;
15834 case PM_REDO_NODE: type = "redo"; break;
15835 default: assert(false && "unreachable"); type = ""; break;
15836 }
15837
15838 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15839 }
15840
15841 parser->current_block_exits = previous_block_exits;
15842}
15843
15848static void
15849pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15850 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15851 // If we matched a trailing while/until, then all of the block exits in
15852 // the contained list are valid. In this case we do not need to do
15853 // anything.
15854 parser->current_block_exits = previous_block_exits;
15855 } else if (previous_block_exits != NULL) {
15856 // If we did not matching a trailing while/until, then all of the block
15857 // exits contained in the list are invalid for this specific context.
15858 // However, they could still become valid in a higher level context if
15859 // there is another list above this one. In this case we'll push all of
15860 // the block exits up to the previous list.
15861 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15862 parser->current_block_exits = previous_block_exits;
15863 } else {
15864 // If we did not match a trailing while/until and this was the last
15865 // chance to do so, then all of the block exits in the list are invalid
15866 // and we need to add an error for each of them.
15867 flush_block_exits(parser, previous_block_exits);
15868 }
15869}
15870
15871static inline pm_node_t *
15872parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15873 context_push(parser, PM_CONTEXT_PREDICATE);
15874 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15875 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15876
15877 // Predicates are closed by a term, a "then", or a term and then a "then".
15878 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15879
15880 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15881 predicate_closed = true;
15882 *then_keyword = parser->previous;
15883 }
15884
15885 if (!predicate_closed) {
15886 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15887 }
15888
15889 context_pop(parser);
15890 return predicate;
15891}
15892
15893static inline pm_node_t *
15894parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15895 pm_node_list_t current_block_exits = { 0 };
15896 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15897
15898 pm_token_t keyword = parser->previous;
15899 pm_token_t then_keyword = not_provided(parser);
15900
15901 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15902 pm_statements_node_t *statements = NULL;
15903
15904 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15905 pm_accepts_block_stack_push(parser, true);
15906 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15907 pm_accepts_block_stack_pop(parser);
15908 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15909 }
15910
15911 pm_token_t end_keyword = not_provided(parser);
15912 pm_node_t *parent = NULL;
15913
15914 switch (context) {
15915 case PM_CONTEXT_IF:
15916 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15917 break;
15918 case PM_CONTEXT_UNLESS:
15919 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15920 break;
15921 default:
15922 assert(false && "unreachable");
15923 break;
15924 }
15925
15926 pm_node_t *current = parent;
15927
15928 // Parse any number of elsif clauses. This will form a linked list of if
15929 // nodes pointing to each other from the top.
15930 if (context == PM_CONTEXT_IF) {
15931 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15932 if (parser_end_of_line_p(parser)) {
15933 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15934 }
15935
15936 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15937 pm_token_t elsif_keyword = parser->current;
15938 parser_lex(parser);
15939
15940 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15941 pm_accepts_block_stack_push(parser, true);
15942
15943 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15944 pm_accepts_block_stack_pop(parser);
15945 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15946
15947 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15948 ((pm_if_node_t *) current)->subsequent = elsif;
15949 current = elsif;
15950 }
15951 }
15952
15953 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15954 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15955 opening_newline_index = token_newline_index(parser);
15956
15957 parser_lex(parser);
15958 pm_token_t else_keyword = parser->previous;
15959
15960 pm_accepts_block_stack_push(parser, true);
15961 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15962 pm_accepts_block_stack_pop(parser);
15963
15964 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15965 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15966 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15967
15968 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15969
15970 switch (context) {
15971 case PM_CONTEXT_IF:
15972 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15973 break;
15974 case PM_CONTEXT_UNLESS:
15975 ((pm_unless_node_t *) parent)->else_clause = else_node;
15976 break;
15977 default:
15978 assert(false && "unreachable");
15979 break;
15980 }
15981 } else {
15982 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15983 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15984 }
15985
15986 // Set the appropriate end location for all of the nodes in the subtree.
15987 switch (context) {
15988 case PM_CONTEXT_IF: {
15989 pm_node_t *current = parent;
15990 bool recursing = true;
15991
15992 while (recursing) {
15993 switch (PM_NODE_TYPE(current)) {
15994 case PM_IF_NODE:
15995 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15996 current = ((pm_if_node_t *) current)->subsequent;
15997 recursing = current != NULL;
15998 break;
15999 case PM_ELSE_NODE:
16000 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
16001 recursing = false;
16002 break;
16003 default: {
16004 recursing = false;
16005 break;
16006 }
16007 }
16008 }
16009 break;
16010 }
16011 case PM_CONTEXT_UNLESS:
16012 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
16013 break;
16014 default:
16015 assert(false && "unreachable");
16016 break;
16017 }
16018
16019 pop_block_exits(parser, previous_block_exits);
16020 pm_node_list_free(&current_block_exits);
16021
16022 return parent;
16023}
16024
16029#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16030 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
16031 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
16032 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
16033 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
16034 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
16035 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
16036 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
16037 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
16038 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
16039 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
16040
16045#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
16046 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
16047 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
16048 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
16049 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
16050 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
16051 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
16052 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
16053
16059#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
16060 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
16061 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
16062 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
16063 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16064 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16065 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16066 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16067 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16068
16073#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16074 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16075 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16076 case PM_TOKEN_CLASS_VARIABLE
16077
16082#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16083 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16084 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16085 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16086
16087// Assert here that the flags are the same so that we can safely switch the type
16088// of the node without having to move the flags.
16089PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16090
16095static inline pm_node_flags_t
16096parse_unescaped_encoding(const pm_parser_t *parser) {
16097 if (parser->explicit_encoding != NULL) {
16099 // If the there's an explicit encoding and it's using a UTF-8 escape
16100 // sequence, then mark the string as UTF-8.
16101 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
16102 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16103 // If there's a non-UTF-8 escape sequence being used, then the
16104 // string uses the source encoding, unless the source is marked as
16105 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16106 // order to keep the string valid.
16107 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
16108 }
16109 }
16110 return 0;
16111}
16112
16117static pm_node_t *
16118parse_string_part(pm_parser_t *parser, uint16_t depth) {
16119 switch (parser->current.type) {
16120 // Here the lexer has returned to us plain string content. In this case
16121 // we'll create a string node that has no opening or closing and return that
16122 // as the part. These kinds of parts look like:
16123 //
16124 // "aaa #{bbb} #@ccc ddd"
16125 // ^^^^ ^ ^^^^
16126 case PM_TOKEN_STRING_CONTENT: {
16127 pm_token_t opening = not_provided(parser);
16128 pm_token_t closing = not_provided(parser);
16129
16130 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16131 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16132
16133 parser_lex(parser);
16134 return node;
16135 }
16136 // Here the lexer has returned the beginning of an embedded expression. In
16137 // that case we'll parse the inner statements and return that as the part.
16138 // These kinds of parts look like:
16139 //
16140 // "aaa #{bbb} #@ccc ddd"
16141 // ^^^^^^
16142 case PM_TOKEN_EMBEXPR_BEGIN: {
16143 // Ruby disallows seeing encoding around interpolation in strings,
16144 // even though it is known at parse time.
16145 parser->explicit_encoding = NULL;
16146
16147 pm_lex_state_t state = parser->lex_state;
16148 int brace_nesting = parser->brace_nesting;
16149
16150 parser->brace_nesting = 0;
16151 lex_state_set(parser, PM_LEX_STATE_BEG);
16152 parser_lex(parser);
16153
16154 pm_token_t opening = parser->previous;
16155 pm_statements_node_t *statements = NULL;
16156
16157 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16158 pm_accepts_block_stack_push(parser, true);
16159 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16160 pm_accepts_block_stack_pop(parser);
16161 }
16162
16163 parser->brace_nesting = brace_nesting;
16164 lex_state_set(parser, state);
16165
16166 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16167 pm_token_t closing = parser->previous;
16168
16169 // If this set of embedded statements only contains a single
16170 // statement, then Ruby does not consider it as a possible statement
16171 // that could emit a line event.
16172 if (statements != NULL && statements->body.size == 1) {
16173 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16174 }
16175
16176 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16177 }
16178
16179 // Here the lexer has returned the beginning of an embedded variable.
16180 // In that case we'll parse the variable and create an appropriate node
16181 // for it and then return that node. These kinds of parts look like:
16182 //
16183 // "aaa #{bbb} #@ccc ddd"
16184 // ^^^^^
16185 case PM_TOKEN_EMBVAR: {
16186 // Ruby disallows seeing encoding around interpolation in strings,
16187 // even though it is known at parse time.
16188 parser->explicit_encoding = NULL;
16189
16190 lex_state_set(parser, PM_LEX_STATE_BEG);
16191 parser_lex(parser);
16192
16193 pm_token_t operator = parser->previous;
16194 pm_node_t *variable;
16195
16196 switch (parser->current.type) {
16197 // In this case a back reference is being interpolated. We'll
16198 // create a global variable read node.
16199 case PM_TOKEN_BACK_REFERENCE:
16200 parser_lex(parser);
16201 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16202 break;
16203 // In this case an nth reference is being interpolated. We'll
16204 // create a global variable read node.
16205 case PM_TOKEN_NUMBERED_REFERENCE:
16206 parser_lex(parser);
16207 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16208 break;
16209 // In this case a global variable is being interpolated. We'll
16210 // create a global variable read node.
16211 case PM_TOKEN_GLOBAL_VARIABLE:
16212 parser_lex(parser);
16213 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16214 break;
16215 // In this case an instance variable is being interpolated.
16216 // We'll create an instance variable read node.
16217 case PM_TOKEN_INSTANCE_VARIABLE:
16218 parser_lex(parser);
16219 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16220 break;
16221 // In this case a class variable is being interpolated. We'll
16222 // create a class variable read node.
16223 case PM_TOKEN_CLASS_VARIABLE:
16224 parser_lex(parser);
16225 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16226 break;
16227 // We can hit here if we got an invalid token. In that case
16228 // we'll not attempt to lex this token and instead just return a
16229 // missing node.
16230 default:
16231 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16232 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16233 break;
16234 }
16235
16236 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16237 }
16238 default:
16239 parser_lex(parser);
16240 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16241 return NULL;
16242 }
16243}
16244
16250static const uint8_t *
16251parse_operator_symbol_name(const pm_token_t *name) {
16252 switch (name->type) {
16253 case PM_TOKEN_TILDE:
16254 case PM_TOKEN_BANG:
16255 if (name->end[-1] == '@') return name->end - 1;
16257 default:
16258 return name->end;
16259 }
16260}
16261
16262static pm_node_t *
16263parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16264 pm_token_t closing = not_provided(parser);
16265 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16266
16267 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16268
16269 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16270 parser_lex(parser);
16271
16272 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16273 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16274
16275 return (pm_node_t *) symbol;
16276}
16277
16283static pm_node_t *
16284parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16285 const pm_token_t opening = parser->previous;
16286
16287 if (lex_mode->mode != PM_LEX_STRING) {
16288 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16289
16290 switch (parser->current.type) {
16291 case PM_CASE_OPERATOR:
16292 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16293 case PM_TOKEN_IDENTIFIER:
16294 case PM_TOKEN_CONSTANT:
16295 case PM_TOKEN_INSTANCE_VARIABLE:
16296 case PM_TOKEN_METHOD_NAME:
16297 case PM_TOKEN_CLASS_VARIABLE:
16298 case PM_TOKEN_GLOBAL_VARIABLE:
16299 case PM_TOKEN_NUMBERED_REFERENCE:
16300 case PM_TOKEN_BACK_REFERENCE:
16301 case PM_CASE_KEYWORD:
16302 parser_lex(parser);
16303 break;
16304 default:
16305 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16306 break;
16307 }
16308
16309 pm_token_t closing = not_provided(parser);
16310 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16311
16312 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16313 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16314
16315 return (pm_node_t *) symbol;
16316 }
16317
16318 if (lex_mode->as.string.interpolation) {
16319 // If we have the end of the symbol, then we can return an empty symbol.
16320 if (match1(parser, PM_TOKEN_STRING_END)) {
16321 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16322 parser_lex(parser);
16323
16324 pm_token_t content = not_provided(parser);
16325 pm_token_t closing = parser->previous;
16326 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16327 }
16328
16329 // Now we can parse the first part of the symbol.
16330 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16331
16332 // If we got a string part, then it's possible that we could transform
16333 // what looks like an interpolated symbol into a regular symbol.
16334 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16335 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16336 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16337
16338 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16339 }
16340
16341 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16342 if (part) pm_interpolated_symbol_node_append(symbol, part);
16343
16344 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16345 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16346 pm_interpolated_symbol_node_append(symbol, part);
16347 }
16348 }
16349
16350 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16351 if (match1(parser, PM_TOKEN_EOF)) {
16352 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16353 } else {
16354 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16355 }
16356
16357 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16358 return (pm_node_t *) symbol;
16359 }
16360
16361 pm_token_t content;
16362 pm_string_t unescaped;
16363
16364 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16365 content = parser->current;
16366 unescaped = parser->current_string;
16367 parser_lex(parser);
16368
16369 // If we have two string contents in a row, then the content of this
16370 // symbol is split because of heredoc contents. This looks like:
16371 //
16372 // <<A; :'a
16373 // A
16374 // b'
16375 //
16376 // In this case, the best way we have to represent this is as an
16377 // interpolated string node, so that's what we'll do here.
16378 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16379 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16380 pm_token_t bounds = not_provided(parser);
16381
16382 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16383 pm_interpolated_symbol_node_append(symbol, part);
16384
16385 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16386 pm_interpolated_symbol_node_append(symbol, part);
16387
16388 if (next_state != PM_LEX_STATE_NONE) {
16389 lex_state_set(parser, next_state);
16390 }
16391
16392 parser_lex(parser);
16393 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16394
16395 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16396 return (pm_node_t *) symbol;
16397 }
16398 } else {
16399 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16400 pm_string_shared_init(&unescaped, content.start, content.end);
16401 }
16402
16403 if (next_state != PM_LEX_STATE_NONE) {
16404 lex_state_set(parser, next_state);
16405 }
16406
16407 if (match1(parser, PM_TOKEN_EOF)) {
16408 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16409 } else {
16410 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16411 }
16412
16413 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16414}
16415
16420static inline pm_node_t *
16421parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16422 switch (parser->current.type) {
16423 case PM_CASE_OPERATOR: {
16424 const pm_token_t opening = not_provided(parser);
16425 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16426 }
16427 case PM_CASE_KEYWORD:
16428 case PM_TOKEN_CONSTANT:
16429 case PM_TOKEN_IDENTIFIER:
16430 case PM_TOKEN_METHOD_NAME: {
16431 parser_lex(parser);
16432
16433 pm_token_t opening = not_provided(parser);
16434 pm_token_t closing = not_provided(parser);
16435 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16436
16437 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16438 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16439
16440 return (pm_node_t *) symbol;
16441 }
16442 case PM_TOKEN_SYMBOL_BEGIN: {
16443 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16444 parser_lex(parser);
16445
16446 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16447 }
16448 default:
16449 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16450 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16451 }
16452}
16453
16460static inline pm_node_t *
16461parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16462 switch (parser->current.type) {
16463 case PM_CASE_OPERATOR: {
16464 const pm_token_t opening = not_provided(parser);
16465 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16466 }
16467 case PM_CASE_KEYWORD:
16468 case PM_TOKEN_CONSTANT:
16469 case PM_TOKEN_IDENTIFIER:
16470 case PM_TOKEN_METHOD_NAME: {
16471 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16472 parser_lex(parser);
16473
16474 pm_token_t opening = not_provided(parser);
16475 pm_token_t closing = not_provided(parser);
16476 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16477
16478 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16479 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16480
16481 return (pm_node_t *) symbol;
16482 }
16483 case PM_TOKEN_SYMBOL_BEGIN: {
16484 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16485 parser_lex(parser);
16486
16487 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16488 }
16489 case PM_TOKEN_BACK_REFERENCE:
16490 parser_lex(parser);
16491 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16492 case PM_TOKEN_NUMBERED_REFERENCE:
16493 parser_lex(parser);
16494 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16495 case PM_TOKEN_GLOBAL_VARIABLE:
16496 parser_lex(parser);
16497 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16498 default:
16499 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16500 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16501 }
16502}
16503
16508static pm_node_t *
16509parse_variable(pm_parser_t *parser) {
16510 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16511 int depth;
16512 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16513
16514 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16515 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16516 }
16517
16518 pm_scope_t *current_scope = parser->current_scope;
16519 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16520 if (is_numbered_param) {
16521 // When you use a numbered parameter, it implies the existence of
16522 // all of the locals that exist before it. For example, referencing
16523 // _2 means that _1 must exist. Therefore here we loop through all
16524 // of the possibilities and add them into the constant pool.
16525 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16526 for (uint8_t number = 1; number <= maximum; number++) {
16527 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16528 }
16529
16530 if (!match1(parser, PM_TOKEN_EQUAL)) {
16531 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16532 }
16533
16534 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16535 pm_node_list_append(&current_scope->implicit_parameters, node);
16536
16537 return node;
16538 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16539 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16540 pm_node_list_append(&current_scope->implicit_parameters, node);
16541
16542 return node;
16543 }
16544 }
16545
16546 return NULL;
16547}
16548
16552static pm_node_t *
16553parse_variable_call(pm_parser_t *parser) {
16554 pm_node_flags_t flags = 0;
16555
16556 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16557 pm_node_t *node = parse_variable(parser);
16558 if (node != NULL) return node;
16559 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16560 }
16561
16562 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16563 pm_node_flag_set((pm_node_t *)node, flags);
16564
16565 return (pm_node_t *) node;
16566}
16567
16573static inline pm_token_t
16574parse_method_definition_name(pm_parser_t *parser) {
16575 switch (parser->current.type) {
16576 case PM_CASE_KEYWORD:
16577 case PM_TOKEN_CONSTANT:
16578 case PM_TOKEN_METHOD_NAME:
16579 parser_lex(parser);
16580 return parser->previous;
16581 case PM_TOKEN_IDENTIFIER:
16582 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16583 parser_lex(parser);
16584 return parser->previous;
16585 case PM_CASE_OPERATOR:
16586 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16587 parser_lex(parser);
16588 return parser->previous;
16589 default:
16590 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16591 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16592 }
16593}
16594
16595static void
16596parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16597 // Get a reference to the string struct that is being held by the string
16598 // node. This is the value we're going to actually manipulate.
16599 pm_string_ensure_owned(string);
16600
16601 // Now get the bounds of the existing string. We'll use this as a
16602 // destination to move bytes into. We'll also use it for bounds checking
16603 // since we don't require that these strings be null terminated.
16604 size_t dest_length = pm_string_length(string);
16605 const uint8_t *source_cursor = (uint8_t *) string->source;
16606 const uint8_t *source_end = source_cursor + dest_length;
16607
16608 // We're going to move bytes backward in the string when we get leading
16609 // whitespace, so we'll maintain a pointer to the current position in the
16610 // string that we're writing to.
16611 size_t trimmed_whitespace = 0;
16612
16613 // While we haven't reached the amount of common whitespace that we need to
16614 // trim and we haven't reached the end of the string, we'll keep trimming
16615 // whitespace. Trimming in this context means skipping over these bytes such
16616 // that they aren't copied into the new string.
16617 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16618 if (*source_cursor == '\t') {
16619 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16620 if (trimmed_whitespace > common_whitespace) break;
16621 } else {
16622 trimmed_whitespace++;
16623 }
16624
16625 source_cursor++;
16626 dest_length--;
16627 }
16628
16629 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16630 string->length = dest_length;
16631}
16632
16636static void
16637parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16638 // The next node should be dedented if it's the first node in the list or if
16639 // it follows a string node.
16640 bool dedent_next = true;
16641
16642 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16643 // keep around two indices: a read and a write. If we end up trimming all of
16644 // the whitespace from a node, then we'll drop it from the list entirely.
16645 size_t write_index = 0;
16646
16647 pm_node_t *node;
16648 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16649 // We're not manipulating child nodes that aren't strings. In this case
16650 // we'll skip past it and indicate that the subsequent node should not
16651 // be dedented.
16652 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16653 nodes->nodes[write_index++] = node;
16654 dedent_next = false;
16655 continue;
16656 }
16657
16658 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16659 if (dedent_next) {
16660 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16661 }
16662
16663 if (string_node->unescaped.length == 0) {
16664 pm_node_destroy(parser, node);
16665 } else {
16666 nodes->nodes[write_index++] = node;
16667 }
16668
16669 // We always dedent the next node if it follows a string node.
16670 dedent_next = true;
16671 }
16672
16673 nodes->size = write_index;
16674}
16675
16679static pm_token_t
16680parse_strings_empty_content(const uint8_t *location) {
16681 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16682}
16683
16687static inline pm_node_t *
16688parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16689 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16690 bool concating = false;
16691
16692 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16693 pm_node_t *node = NULL;
16694
16695 // Here we have found a string literal. We'll parse it and add it to
16696 // the list of strings.
16697 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16698 assert(lex_mode->mode == PM_LEX_STRING);
16699 bool lex_interpolation = lex_mode->as.string.interpolation;
16700 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16701
16702 pm_token_t opening = parser->current;
16703 parser_lex(parser);
16704
16705 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16706 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16707 // If we get here, then we have an end immediately after a
16708 // start. In that case we'll create an empty content token and
16709 // return an uninterpolated string.
16710 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16711 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16712
16713 pm_string_shared_init(&string->unescaped, content.start, content.end);
16714 node = (pm_node_t *) string;
16715 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16716 // If we get here, then we have an end of a label immediately
16717 // after a start. In that case we'll create an empty symbol
16718 // node.
16719 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16720 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16721
16722 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16723 node = (pm_node_t *) symbol;
16724
16725 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16726 } else if (!lex_interpolation) {
16727 // If we don't accept interpolation then we expect the string to
16728 // start with a single string content node.
16729 pm_string_t unescaped;
16730 pm_token_t content;
16731
16732 if (match1(parser, PM_TOKEN_EOF)) {
16733 unescaped = PM_STRING_EMPTY;
16734 content = not_provided(parser);
16735 } else {
16736 unescaped = parser->current_string;
16737 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16738 content = parser->previous;
16739 }
16740
16741 // It is unfortunately possible to have multiple string content
16742 // nodes in a row in the case that there's heredoc content in
16743 // the middle of the string, like this cursed example:
16744 //
16745 // <<-END+'b
16746 // a
16747 // END
16748 // c'+'d'
16749 //
16750 // In that case we need to switch to an interpolated string to
16751 // be able to contain all of the parts.
16752 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16753 pm_node_list_t parts = { 0 };
16754
16755 pm_token_t delimiters = not_provided(parser);
16756 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16757 pm_node_list_append(&parts, part);
16758
16759 do {
16760 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16761 pm_node_list_append(&parts, part);
16762 parser_lex(parser);
16763 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16764
16765 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16766 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16767
16768 pm_node_list_free(&parts);
16769 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16770 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16771 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16772 } else if (match1(parser, PM_TOKEN_EOF)) {
16773 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16774 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16775 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16776 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16777 } else {
16778 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16779 parser->previous.start = parser->previous.end;
16780 parser->previous.type = PM_TOKEN_MISSING;
16781 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16782 }
16783 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16784 // In this case we've hit string content so we know the string
16785 // at least has something in it. We'll need to check if the
16786 // following token is the end (in which case we can return a
16787 // plain string) or if it's not then it has interpolation.
16788 pm_token_t content = parser->current;
16789 pm_string_t unescaped = parser->current_string;
16790 parser_lex(parser);
16791
16792 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16793 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16794 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16795
16796 // Kind of odd behavior, but basically if we have an
16797 // unterminated string and it ends in a newline, we back up one
16798 // character so that the error message is on the last line of
16799 // content in the string.
16800 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16801 const uint8_t *location = parser->previous.end;
16802 if (location > parser->start && location[-1] == '\n') location--;
16803 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16804
16805 parser->previous.start = parser->previous.end;
16806 parser->previous.type = PM_TOKEN_MISSING;
16807 }
16808 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16809 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16810 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16811 } else {
16812 // If we get here, then we have interpolation so we'll need
16813 // to create a string or symbol node with interpolation.
16814 pm_node_list_t parts = { 0 };
16815 pm_token_t string_opening = not_provided(parser);
16816 pm_token_t string_closing = not_provided(parser);
16817
16818 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16819 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16820 pm_node_list_append(&parts, part);
16821
16822 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16823 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16824 pm_node_list_append(&parts, part);
16825 }
16826 }
16827
16828 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16829 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16830 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16831 } else if (match1(parser, PM_TOKEN_EOF)) {
16832 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16833 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16834 } else {
16835 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16836 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16837 }
16838
16839 pm_node_list_free(&parts);
16840 }
16841 } else {
16842 // If we get here, then the first part of the string is not plain
16843 // string content, in which case we need to parse the string as an
16844 // interpolated string.
16845 pm_node_list_t parts = { 0 };
16846 pm_node_t *part;
16847
16848 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16849 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16850 pm_node_list_append(&parts, part);
16851 }
16852 }
16853
16854 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16855 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16856 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16857 } else if (match1(parser, PM_TOKEN_EOF)) {
16858 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16859 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16860 } else {
16861 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16862 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16863 }
16864
16865 pm_node_list_free(&parts);
16866 }
16867
16868 if (current == NULL) {
16869 // If the node we just parsed is a symbol node, then we can't
16870 // concatenate it with anything else, so we can now return that
16871 // node.
16872 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16873 return node;
16874 }
16875
16876 // If we don't already have a node, then it's fine and we can just
16877 // set the result to be the node we just parsed.
16878 current = node;
16879 } else {
16880 // Otherwise we need to check the type of the node we just parsed.
16881 // If it cannot be concatenated with the previous node, then we'll
16882 // need to add a syntax error.
16883 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16884 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16885 }
16886
16887 // If we haven't already created our container for concatenation,
16888 // we'll do that now.
16889 if (!concating) {
16890 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16891 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16892 }
16893
16894 concating = true;
16895 pm_token_t bounds = not_provided(parser);
16896
16897 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16898 pm_interpolated_string_node_append(container, current);
16899 current = (pm_node_t *) container;
16900 }
16901
16902 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16903 }
16904 }
16905
16906 return current;
16907}
16908
16909#define PM_PARSE_PATTERN_SINGLE 0
16910#define PM_PARSE_PATTERN_TOP 1
16911#define PM_PARSE_PATTERN_MULTI 2
16912
16913static pm_node_t *
16914parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16915
16921static void
16922parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16923 // Skip this capture if it starts with an underscore.
16924 if (*location->start == '_') return;
16925
16926 if (pm_constant_id_list_includes(captures, capture)) {
16927 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16928 } else {
16929 pm_constant_id_list_append(captures, capture);
16930 }
16931}
16932
16936static pm_node_t *
16937parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16938 // Now, if there are any :: operators that follow, parse them as constant
16939 // path nodes.
16940 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16941 pm_token_t delimiter = parser->previous;
16942 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16943 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16944 }
16945
16946 // If there is a [ or ( that follows, then this is part of a larger pattern
16947 // expression. We'll parse the inner pattern here, then modify the returned
16948 // inner pattern with our constant path attached.
16949 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16950 return node;
16951 }
16952
16953 pm_token_t opening;
16954 pm_token_t closing;
16955 pm_node_t *inner = NULL;
16956
16957 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16958 opening = parser->previous;
16959 accept1(parser, PM_TOKEN_NEWLINE);
16960
16961 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16962 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16963 accept1(parser, PM_TOKEN_NEWLINE);
16964 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16965 }
16966
16967 closing = parser->previous;
16968 } else {
16969 parser_lex(parser);
16970 opening = parser->previous;
16971 accept1(parser, PM_TOKEN_NEWLINE);
16972
16973 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16974 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16975 accept1(parser, PM_TOKEN_NEWLINE);
16976 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16977 }
16978
16979 closing = parser->previous;
16980 }
16981
16982 if (!inner) {
16983 // If there was no inner pattern, then we have something like Foo() or
16984 // Foo[]. In that case we'll create an array pattern with no requireds.
16985 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16986 }
16987
16988 // Now that we have the inner pattern, check to see if it's an array, find,
16989 // or hash pattern. If it is, then we'll attach our constant path to it if
16990 // it doesn't already have a constant. If it's not one of those node types
16991 // or it does have a constant, then we'll create an array pattern.
16992 switch (PM_NODE_TYPE(inner)) {
16993 case PM_ARRAY_PATTERN_NODE: {
16994 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16995
16996 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16997 pattern_node->base.location.start = node->location.start;
16998 pattern_node->base.location.end = closing.end;
16999
17000 pattern_node->constant = node;
17001 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17002 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17003
17004 return (pm_node_t *) pattern_node;
17005 }
17006
17007 break;
17008 }
17009 case PM_FIND_PATTERN_NODE: {
17010 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17011
17012 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17013 pattern_node->base.location.start = node->location.start;
17014 pattern_node->base.location.end = closing.end;
17015
17016 pattern_node->constant = node;
17017 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17018 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17019
17020 return (pm_node_t *) pattern_node;
17021 }
17022
17023 break;
17024 }
17025 case PM_HASH_PATTERN_NODE: {
17026 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
17027
17028 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17029 pattern_node->base.location.start = node->location.start;
17030 pattern_node->base.location.end = closing.end;
17031
17032 pattern_node->constant = node;
17033 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17034 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17035
17036 return (pm_node_t *) pattern_node;
17037 }
17038
17039 break;
17040 }
17041 default:
17042 break;
17043 }
17044
17045 // If we got here, then we didn't return one of the inner patterns by
17046 // attaching its constant. In this case we'll create an array pattern and
17047 // attach our constant to it.
17048 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
17049 pm_array_pattern_node_requireds_append(pattern_node, inner);
17050 return (pm_node_t *) pattern_node;
17051}
17052
17056static pm_splat_node_t *
17057parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17058 assert(parser->previous.type == PM_TOKEN_USTAR);
17059 pm_token_t operator = parser->previous;
17060 pm_node_t *name = NULL;
17061
17062 // Rest patterns don't necessarily have a name associated with them. So we
17063 // will check for that here. If they do, then we'll add it to the local
17064 // table since this pattern will cause it to become a local variable.
17065 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17066 pm_token_t identifier = parser->previous;
17067 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17068
17069 int depth;
17070 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17071 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17072 }
17073
17074 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17075 name = (pm_node_t *) pm_local_variable_target_node_create(
17076 parser,
17077 &PM_LOCATION_TOKEN_VALUE(&identifier),
17078 constant_id,
17079 (uint32_t) (depth == -1 ? 0 : depth)
17080 );
17081 }
17082
17083 // Finally we can return the created node.
17084 return pm_splat_node_create(parser, &operator, name);
17085}
17086
17090static pm_node_t *
17091parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17092 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17093 parser_lex(parser);
17094
17095 pm_token_t operator = parser->previous;
17096 pm_node_t *value = NULL;
17097
17098 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17099 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17100 }
17101
17102 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17103 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17104
17105 int depth;
17106 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17107 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17108 }
17109
17110 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17111 value = (pm_node_t *) pm_local_variable_target_node_create(
17112 parser,
17113 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17114 constant_id,
17115 (uint32_t) (depth == -1 ? 0 : depth)
17116 );
17117 }
17118
17119 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17120}
17121
17126static bool
17127pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17128 ptrdiff_t length = end - start;
17129 if (length == 0) return false;
17130
17131 // First ensure that it starts with a valid identifier starting character.
17132 size_t width = char_is_identifier_start(parser, start, end - start);
17133 if (width == 0) return false;
17134
17135 // Next, ensure that it's not an uppercase character.
17136 if (parser->encoding_changed) {
17137 if (parser->encoding->isupper_char(start, length)) return false;
17138 } else {
17139 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17140 }
17141
17142 // Next, iterate through all of the bytes of the string to ensure that they
17143 // are all valid identifier characters.
17144 const uint8_t *cursor = start + width;
17145 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17146 return cursor == end;
17147}
17148
17153static pm_node_t *
17154parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17155 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17156
17157 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17158 int depth = -1;
17159
17160 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17161 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17162 } else {
17163 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17164
17165 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17166 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17167 }
17168 }
17169
17170 if (depth == -1) {
17171 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17172 }
17173
17174 parse_pattern_capture(parser, captures, constant_id, value_loc);
17175 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17176 parser,
17177 value_loc,
17178 constant_id,
17179 (uint32_t) (depth == -1 ? 0 : depth)
17180 );
17181
17182 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17183}
17184
17189static void
17190parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17191 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17192 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17193 }
17194}
17195
17200parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17201 pm_node_list_t assocs = { 0 };
17202 pm_static_literals_t keys = { 0 };
17203 pm_node_t *rest = NULL;
17204
17205 switch (PM_NODE_TYPE(first_node)) {
17206 case PM_ASSOC_SPLAT_NODE:
17207 case PM_NO_KEYWORDS_PARAMETER_NODE:
17208 rest = first_node;
17209 break;
17210 case PM_SYMBOL_NODE: {
17211 if (pm_symbol_node_label_p(first_node)) {
17212 parse_pattern_hash_key(parser, &keys, first_node);
17213 pm_node_t *value;
17214
17215 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17216 // Otherwise, we will create an implicit local variable
17217 // target for the value.
17218 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17219 } else {
17220 // Here we have a value for the first assoc in the list, so
17221 // we will parse it now.
17222 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17223 }
17224
17225 pm_token_t operator = not_provided(parser);
17226 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17227
17228 pm_node_list_append(&assocs, assoc);
17229 break;
17230 }
17231 }
17233 default: {
17234 // If we get anything else, then this is an error. For this we'll
17235 // create a missing node for the value and create an assoc node for
17236 // the first node in the list.
17237 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17238 pm_parser_err_node(parser, first_node, diag_id);
17239
17240 pm_token_t operator = not_provided(parser);
17241 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17242 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17243
17244 pm_node_list_append(&assocs, assoc);
17245 break;
17246 }
17247 }
17248
17249 // If there are any other assocs, then we'll parse them now.
17250 while (accept1(parser, PM_TOKEN_COMMA)) {
17251 // Here we need to break to support trailing commas.
17252 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17253 // Trailing commas are not allowed to follow a rest pattern.
17254 if (rest != NULL) {
17255 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17256 }
17257
17258 break;
17259 }
17260
17261 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17262 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17263
17264 if (rest == NULL) {
17265 rest = assoc;
17266 } else {
17267 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17268 pm_node_list_append(&assocs, assoc);
17269 }
17270 } else {
17271 pm_node_t *key;
17272
17273 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17274 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17275
17276 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
17277 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17278 } else if (!pm_symbol_node_label_p(key)) {
17279 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17280 }
17281 } else {
17282 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17283 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17284 }
17285
17286 parse_pattern_hash_key(parser, &keys, key);
17287 pm_node_t *value = NULL;
17288
17289 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17290 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17291 } else {
17292 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17293 }
17294
17295 pm_token_t operator = not_provided(parser);
17296 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17297
17298 if (rest != NULL) {
17299 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17300 }
17301
17302 pm_node_list_append(&assocs, assoc);
17303 }
17304 }
17305
17306 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17307 xfree(assocs.nodes);
17308
17309 pm_static_literals_free(&keys);
17310 return node;
17311}
17312
17316static pm_node_t *
17317parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17318 switch (parser->current.type) {
17319 case PM_TOKEN_IDENTIFIER:
17320 case PM_TOKEN_METHOD_NAME: {
17321 parser_lex(parser);
17322 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17323
17324 int depth;
17325 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17326 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17327 }
17328
17329 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17330 return (pm_node_t *) pm_local_variable_target_node_create(
17331 parser,
17332 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17333 constant_id,
17334 (uint32_t) (depth == -1 ? 0 : depth)
17335 );
17336 }
17337 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17338 pm_token_t opening = parser->current;
17339 parser_lex(parser);
17340
17341 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17342 // If we have an empty array pattern, then we'll just return a new
17343 // array pattern node.
17344 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17345 }
17346
17347 // Otherwise, we'll parse the inner pattern, then deal with it depending
17348 // on the type it returns.
17349 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17350
17351 accept1(parser, PM_TOKEN_NEWLINE);
17352 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17353 pm_token_t closing = parser->previous;
17354
17355 switch (PM_NODE_TYPE(inner)) {
17356 case PM_ARRAY_PATTERN_NODE: {
17357 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17358 if (pattern_node->opening_loc.start == NULL) {
17359 pattern_node->base.location.start = opening.start;
17360 pattern_node->base.location.end = closing.end;
17361
17362 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17363 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17364
17365 return (pm_node_t *) pattern_node;
17366 }
17367
17368 break;
17369 }
17370 case PM_FIND_PATTERN_NODE: {
17371 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17372 if (pattern_node->opening_loc.start == NULL) {
17373 pattern_node->base.location.start = opening.start;
17374 pattern_node->base.location.end = closing.end;
17375
17376 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17377 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17378
17379 return (pm_node_t *) pattern_node;
17380 }
17381
17382 break;
17383 }
17384 default:
17385 break;
17386 }
17387
17388 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17389 pm_array_pattern_node_requireds_append(node, inner);
17390 return (pm_node_t *) node;
17391 }
17392 case PM_TOKEN_BRACE_LEFT: {
17393 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17394 parser->pattern_matching_newlines = false;
17395
17397 pm_token_t opening = parser->current;
17398 parser_lex(parser);
17399
17400 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17401 // If we have an empty hash pattern, then we'll just return a new hash
17402 // pattern node.
17403 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17404 } else {
17405 pm_node_t *first_node;
17406
17407 switch (parser->current.type) {
17408 case PM_TOKEN_LABEL:
17409 parser_lex(parser);
17410 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17411 break;
17412 case PM_TOKEN_USTAR_STAR:
17413 first_node = parse_pattern_keyword_rest(parser, captures);
17414 break;
17415 case PM_TOKEN_STRING_BEGIN:
17416 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17417 break;
17418 default: {
17419 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17420 parser_lex(parser);
17421
17422 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17423 break;
17424 }
17425 }
17426
17427 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17428
17429 accept1(parser, PM_TOKEN_NEWLINE);
17430 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17431 pm_token_t closing = parser->previous;
17432
17433 node->base.location.start = opening.start;
17434 node->base.location.end = closing.end;
17435
17436 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17437 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17438 }
17439
17440 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17441 return (pm_node_t *) node;
17442 }
17443 case PM_TOKEN_UDOT_DOT:
17444 case PM_TOKEN_UDOT_DOT_DOT: {
17445 pm_token_t operator = parser->current;
17446 parser_lex(parser);
17447
17448 // Since we have a unary range operator, we need to parse the subsequent
17449 // expression as the right side of the range.
17450 switch (parser->current.type) {
17451 case PM_CASE_PRIMITIVE: {
17452 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17453 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17454 }
17455 default: {
17456 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17457 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17458 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17459 }
17460 }
17461 }
17462 case PM_CASE_PRIMITIVE: {
17463 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17464
17465 // If we found a label, we need to immediately return to the caller.
17466 if (pm_symbol_node_label_p(node)) return node;
17467
17468 // Call nodes (arithmetic operations) are not allowed in patterns
17469 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17470 pm_parser_err_node(parser, node, diag_id);
17471 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17472 pm_node_destroy(parser, node);
17473 return (pm_node_t *) missing_node;
17474 }
17475
17476 // Now that we have a primitive, we need to check if it's part of a range.
17477 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17478 pm_token_t operator = parser->previous;
17479
17480 // Now that we have the operator, we need to check if this is followed
17481 // by another expression. If it is, then we will create a full range
17482 // node. Otherwise, we'll create an endless range.
17483 switch (parser->current.type) {
17484 case PM_CASE_PRIMITIVE: {
17485 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17486 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17487 }
17488 default:
17489 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17490 }
17491 }
17492
17493 return node;
17494 }
17495 case PM_TOKEN_CARET: {
17496 parser_lex(parser);
17497 pm_token_t operator = parser->previous;
17498
17499 // At this point we have a pin operator. We need to check the subsequent
17500 // expression to determine if it's a variable or an expression.
17501 switch (parser->current.type) {
17502 case PM_TOKEN_IDENTIFIER: {
17503 parser_lex(parser);
17504 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17505
17506 if (variable == NULL) {
17507 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17508 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17509 }
17510
17511 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17512 }
17513 case PM_TOKEN_INSTANCE_VARIABLE: {
17514 parser_lex(parser);
17515 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17516
17517 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17518 }
17519 case PM_TOKEN_CLASS_VARIABLE: {
17520 parser_lex(parser);
17521 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17522
17523 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17524 }
17525 case PM_TOKEN_GLOBAL_VARIABLE: {
17526 parser_lex(parser);
17527 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17528
17529 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17530 }
17531 case PM_TOKEN_NUMBERED_REFERENCE: {
17532 parser_lex(parser);
17533 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17534
17535 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17536 }
17537 case PM_TOKEN_BACK_REFERENCE: {
17538 parser_lex(parser);
17539 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17540
17541 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17542 }
17543 case PM_TOKEN_PARENTHESIS_LEFT: {
17544 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17545 parser->pattern_matching_newlines = false;
17546
17547 pm_token_t lparen = parser->current;
17548 parser_lex(parser);
17549
17550 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17551 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17552
17553 accept1(parser, PM_TOKEN_NEWLINE);
17554 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17555 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17556 }
17557 default: {
17558 // If we get here, then we have a pin operator followed by something
17559 // not understood. We'll create a missing node and return that.
17560 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17561 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17562 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17563 }
17564 }
17565 }
17566 case PM_TOKEN_UCOLON_COLON: {
17567 pm_token_t delimiter = parser->current;
17568 parser_lex(parser);
17569
17570 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17571 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17572
17573 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17574 }
17575 case PM_TOKEN_CONSTANT: {
17576 pm_token_t constant = parser->current;
17577 parser_lex(parser);
17578
17579 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17580 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17581 }
17582 default:
17583 pm_parser_err_current(parser, diag_id);
17584 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17585 }
17586}
17587
17592static pm_node_t *
17593parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17594 pm_node_t *node = first_node;
17595
17596 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17597 pm_token_t operator = parser->previous;
17598
17599 switch (parser->current.type) {
17600 case PM_TOKEN_IDENTIFIER:
17601 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17602 case PM_TOKEN_BRACE_LEFT:
17603 case PM_TOKEN_CARET:
17604 case PM_TOKEN_CONSTANT:
17605 case PM_TOKEN_UCOLON_COLON:
17606 case PM_TOKEN_UDOT_DOT:
17607 case PM_TOKEN_UDOT_DOT_DOT:
17608 case PM_CASE_PRIMITIVE: {
17609 if (node == NULL) {
17610 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17611 } else {
17612 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17613 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17614 }
17615
17616 break;
17617 }
17618 case PM_TOKEN_PARENTHESIS_LEFT:
17619 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17620 pm_token_t opening = parser->current;
17621 parser_lex(parser);
17622
17623 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17624 accept1(parser, PM_TOKEN_NEWLINE);
17625 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17626 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17627
17628 if (node == NULL) {
17629 node = right;
17630 } else {
17631 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17632 }
17633
17634 break;
17635 }
17636 default: {
17637 pm_parser_err_current(parser, diag_id);
17638 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17639
17640 if (node == NULL) {
17641 node = right;
17642 } else {
17643 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17644 }
17645
17646 break;
17647 }
17648 }
17649 }
17650
17651 // If we have an =>, then we are assigning this pattern to a variable.
17652 // In this case we should create an assignment node.
17653 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17654 pm_token_t operator = parser->previous;
17655 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17656
17657 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17658 int depth;
17659
17660 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17661 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17662 }
17663
17664 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17665 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17666 parser,
17667 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17668 constant_id,
17669 (uint32_t) (depth == -1 ? 0 : depth)
17670 );
17671
17672 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17673 }
17674
17675 return node;
17676}
17677
17681static pm_node_t *
17682parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17683 pm_node_t *node = NULL;
17684
17685 bool leading_rest = false;
17686 bool trailing_rest = false;
17687
17688 switch (parser->current.type) {
17689 case PM_TOKEN_LABEL: {
17690 parser_lex(parser);
17691 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17692 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17693
17694 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17695 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17696 }
17697
17698 return node;
17699 }
17700 case PM_TOKEN_USTAR_STAR: {
17701 node = parse_pattern_keyword_rest(parser, captures);
17702 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17703
17704 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17705 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17706 }
17707
17708 return node;
17709 }
17710 case PM_TOKEN_STRING_BEGIN: {
17711 // We need special handling for string beginnings because they could
17712 // be dynamic symbols leading to hash patterns.
17713 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17714
17715 if (pm_symbol_node_label_p(node)) {
17716 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17717
17718 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17719 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17720 }
17721
17722 return node;
17723 }
17724
17725 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17726 break;
17727 }
17728 case PM_TOKEN_USTAR: {
17729 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17730 parser_lex(parser);
17731 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17732 leading_rest = true;
17733 break;
17734 }
17735 }
17737 default:
17738 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17739 break;
17740 }
17741
17742 // If we got a dynamic label symbol, then we need to treat it like the
17743 // beginning of a hash pattern.
17744 if (pm_symbol_node_label_p(node)) {
17745 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17746 }
17747
17748 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17749 // If we have a comma, then we are now parsing either an array pattern
17750 // or a find pattern. We need to parse all of the patterns, put them
17751 // into a big list, and then determine which type of node we have.
17752 pm_node_list_t nodes = { 0 };
17753 pm_node_list_append(&nodes, node);
17754
17755 // Gather up all of the patterns into the list.
17756 while (accept1(parser, PM_TOKEN_COMMA)) {
17757 // Break early here in case we have a trailing comma.
17758 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17759 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17760 pm_node_list_append(&nodes, node);
17761 trailing_rest = true;
17762 break;
17763 }
17764
17765 if (accept1(parser, PM_TOKEN_USTAR)) {
17766 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17767
17768 // If we have already parsed a splat pattern, then this is an
17769 // error. We will continue to parse the rest of the patterns,
17770 // but we will indicate it as an error.
17771 if (trailing_rest) {
17772 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17773 }
17774
17775 trailing_rest = true;
17776 } else {
17777 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17778 }
17779
17780 pm_node_list_append(&nodes, node);
17781 }
17782
17783 // If the first pattern and the last pattern are rest patterns, then we
17784 // will call this a find pattern, regardless of how many rest patterns
17785 // are in between because we know we already added the appropriate
17786 // errors. Otherwise we will create an array pattern.
17787 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17788 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17789
17790 if (nodes.size == 2) {
17791 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17792 }
17793 } else {
17794 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17795
17796 if (leading_rest && trailing_rest) {
17797 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17798 }
17799 }
17800
17801 xfree(nodes.nodes);
17802 } else if (leading_rest) {
17803 // Otherwise, if we parsed a single splat pattern, then we know we have
17804 // an array pattern, so we can go ahead and create that node.
17805 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17806 }
17807
17808 return node;
17809}
17810
17816static inline void
17817parse_negative_numeric(pm_node_t *node) {
17818 switch (PM_NODE_TYPE(node)) {
17819 case PM_INTEGER_NODE: {
17820 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17821 cast->base.location.start--;
17822 cast->value.negative = true;
17823 break;
17824 }
17825 case PM_FLOAT_NODE: {
17826 pm_float_node_t *cast = (pm_float_node_t *) node;
17827 cast->base.location.start--;
17828 cast->value = -cast->value;
17829 break;
17830 }
17831 case PM_RATIONAL_NODE: {
17832 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17833 cast->base.location.start--;
17834 cast->numerator.negative = true;
17835 break;
17836 }
17837 case PM_IMAGINARY_NODE:
17838 node->location.start--;
17839 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17840 break;
17841 default:
17842 assert(false && "unreachable");
17843 break;
17844 }
17845}
17846
17852static void
17853pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17854 switch (diag_id) {
17855 case PM_ERR_HASH_KEY: {
17856 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17857 break;
17858 }
17859 case PM_ERR_HASH_VALUE:
17860 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17861 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17862 break;
17863 }
17864 case PM_ERR_UNARY_RECEIVER: {
17865 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17866 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17867 break;
17868 }
17869 case PM_ERR_UNARY_DISALLOWED:
17870 case PM_ERR_EXPECT_ARGUMENT: {
17871 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17872 break;
17873 }
17874 default:
17875 pm_parser_err_previous(parser, diag_id);
17876 break;
17877 }
17878}
17879
17883static void
17884parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17885#define CONTEXT_NONE 0
17886#define CONTEXT_THROUGH_ENSURE 1
17887#define CONTEXT_THROUGH_ELSE 2
17888
17889 pm_context_node_t *context_node = parser->current_context;
17890 int context = CONTEXT_NONE;
17891
17892 while (context_node != NULL) {
17893 switch (context_node->context) {
17901 case PM_CONTEXT_DEFINED:
17903 // These are the good cases. We're allowed to have a retry here.
17904 return;
17905 case PM_CONTEXT_CLASS:
17906 case PM_CONTEXT_DEF:
17908 case PM_CONTEXT_MAIN:
17909 case PM_CONTEXT_MODULE:
17910 case PM_CONTEXT_PREEXE:
17911 case PM_CONTEXT_SCLASS:
17912 // These are the bad cases. We're not allowed to have a retry in
17913 // these contexts.
17914 if (context == CONTEXT_NONE) {
17915 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17916 } else if (context == CONTEXT_THROUGH_ENSURE) {
17917 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17918 } else if (context == CONTEXT_THROUGH_ELSE) {
17919 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17920 }
17921 return;
17929 // These are also bad cases, but with a more specific error
17930 // message indicating the else.
17931 context = CONTEXT_THROUGH_ELSE;
17932 break;
17940 // These are also bad cases, but with a more specific error
17941 // message indicating the ensure.
17942 context = CONTEXT_THROUGH_ENSURE;
17943 break;
17944 case PM_CONTEXT_NONE:
17945 // This case should never happen.
17946 assert(false && "unreachable");
17947 break;
17948 case PM_CONTEXT_BEGIN:
17951 case PM_CONTEXT_CASE_IN:
17954 case PM_CONTEXT_ELSE:
17955 case PM_CONTEXT_ELSIF:
17956 case PM_CONTEXT_EMBEXPR:
17958 case PM_CONTEXT_FOR:
17959 case PM_CONTEXT_IF:
17964 case PM_CONTEXT_PARENS:
17965 case PM_CONTEXT_POSTEXE:
17967 case PM_CONTEXT_TERNARY:
17968 case PM_CONTEXT_UNLESS:
17969 case PM_CONTEXT_UNTIL:
17970 case PM_CONTEXT_WHILE:
17971 // In these contexts we should continue walking up the list of
17972 // contexts.
17973 break;
17974 }
17975
17976 context_node = context_node->prev;
17977 }
17978
17979#undef CONTEXT_NONE
17980#undef CONTEXT_ENSURE
17981#undef CONTEXT_ELSE
17982}
17983
17987static void
17988parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17989 pm_context_node_t *context_node = parser->current_context;
17990
17991 while (context_node != NULL) {
17992 switch (context_node->context) {
17993 case PM_CONTEXT_DEF:
17995 case PM_CONTEXT_DEFINED:
17999 // These are the good cases. We're allowed to have a block exit
18000 // in these contexts.
18001 return;
18002 case PM_CONTEXT_CLASS:
18006 case PM_CONTEXT_MAIN:
18007 case PM_CONTEXT_MODULE:
18011 case PM_CONTEXT_SCLASS:
18015 // These are the bad cases. We're not allowed to have a retry in
18016 // these contexts.
18017 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
18018 return;
18019 case PM_CONTEXT_NONE:
18020 // This case should never happen.
18021 assert(false && "unreachable");
18022 break;
18023 case PM_CONTEXT_BEGIN:
18032 case PM_CONTEXT_CASE_IN:
18035 case PM_CONTEXT_ELSE:
18036 case PM_CONTEXT_ELSIF:
18037 case PM_CONTEXT_EMBEXPR:
18039 case PM_CONTEXT_FOR:
18040 case PM_CONTEXT_IF:
18048 case PM_CONTEXT_PARENS:
18049 case PM_CONTEXT_POSTEXE:
18051 case PM_CONTEXT_PREEXE:
18053 case PM_CONTEXT_TERNARY:
18054 case PM_CONTEXT_UNLESS:
18055 case PM_CONTEXT_UNTIL:
18056 case PM_CONTEXT_WHILE:
18057 // In these contexts we should continue walking up the list of
18058 // contexts.
18059 break;
18060 }
18061
18062 context_node = context_node->prev;
18063 }
18064}
18065
18070typedef struct {
18073
18075 const uint8_t *start;
18076
18078 const uint8_t *end;
18079
18088
18093static void
18094parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18096 pm_location_t location;
18097
18098 if (callback_data->shared) {
18099 location = (pm_location_t) { .start = start, .end = end };
18100 } else {
18101 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18102 }
18103
18104 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18105}
18106
18110static void
18111parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18112 const pm_string_t *unescaped = &node->unescaped;
18114 .parser = parser,
18115 .start = node->base.location.start,
18116 .end = node->base.location.end,
18117 .shared = unescaped->type == PM_STRING_SHARED
18118 };
18119
18120 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18121}
18122
18126static inline pm_node_t *
18127parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18128 switch (parser->current.type) {
18129 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
18130 parser_lex(parser);
18131
18132 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18133 pm_accepts_block_stack_push(parser, true);
18134 bool parsed_bare_hash = false;
18135
18136 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18137 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18138
18139 // Handle the case where we don't have a comma and we have a
18140 // newline followed by a right bracket.
18141 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18142 break;
18143 }
18144
18145 // Ensure that we have a comma between elements in the array.
18146 if (array->elements.size > 0) {
18147 if (accept1(parser, PM_TOKEN_COMMA)) {
18148 // If there was a comma but we also accepts a newline,
18149 // then this is a syntax error.
18150 if (accepted_newline) {
18151 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18152 }
18153 } else {
18154 // If there was no comma, then we need to add a syntax
18155 // error.
18156 const uint8_t *location = parser->previous.end;
18157 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18158
18159 parser->previous.start = location;
18160 parser->previous.type = PM_TOKEN_MISSING;
18161 }
18162 }
18163
18164 // If we have a right bracket immediately following a comma,
18165 // this is allowed since it's a trailing comma. In this case we
18166 // can break out of the loop.
18167 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18168
18169 pm_node_t *element;
18170
18171 if (accept1(parser, PM_TOKEN_USTAR)) {
18172 pm_token_t operator = parser->previous;
18173 pm_node_t *expression = NULL;
18174
18175 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18176 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18177 } else {
18178 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18179 }
18180
18181 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18182 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18183 if (parsed_bare_hash) {
18184 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18185 }
18186
18187 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18188 pm_static_literals_t hash_keys = { 0 };
18189
18190 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
18191 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18192 }
18193
18194 pm_static_literals_free(&hash_keys);
18195 parsed_bare_hash = true;
18196 } else {
18197 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18198
18199 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18200 if (parsed_bare_hash) {
18201 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18202 }
18203
18204 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18205 pm_static_literals_t hash_keys = { 0 };
18206 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18207
18208 pm_token_t operator;
18209 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18210 operator = parser->previous;
18211 } else {
18212 operator = not_provided(parser);
18213 }
18214
18215 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18216 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18217 pm_keyword_hash_node_elements_append(hash, assoc);
18218
18219 element = (pm_node_t *) hash;
18220 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18221 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18222 }
18223
18224 pm_static_literals_free(&hash_keys);
18225 parsed_bare_hash = true;
18226 }
18227 }
18228
18229 pm_array_node_elements_append(array, element);
18230 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18231 }
18232
18233 accept1(parser, PM_TOKEN_NEWLINE);
18234
18235 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18236 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18237 parser->previous.start = parser->previous.end;
18238 parser->previous.type = PM_TOKEN_MISSING;
18239 }
18240
18241 pm_array_node_close_set(array, &parser->previous);
18242 pm_accepts_block_stack_pop(parser);
18243
18244 return (pm_node_t *) array;
18245 }
18246 case PM_TOKEN_PARENTHESIS_LEFT:
18247 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18248 pm_token_t opening = parser->current;
18249 pm_node_flags_t flags = 0;
18250
18251 pm_node_list_t current_block_exits = { 0 };
18252 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18253
18254 parser_lex(parser);
18255 while (true) {
18256 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18257 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18258 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18259 break;
18260 }
18261 }
18262
18263 // If this is the end of the file or we match a right parenthesis, then
18264 // we have an empty parentheses node, and we can immediately return.
18265 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18266 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18267
18268 pop_block_exits(parser, previous_block_exits);
18269 pm_node_list_free(&current_block_exits);
18270
18271 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18272 }
18273
18274 // Otherwise, we're going to parse the first statement in the list
18275 // of statements within the parentheses.
18276 pm_accepts_block_stack_push(parser, true);
18277 context_push(parser, PM_CONTEXT_PARENS);
18278 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18279 context_pop(parser);
18280
18281 // Determine if this statement is followed by a terminator. In the
18282 // case of a single statement, this is fine. But in the case of
18283 // multiple statements it's required.
18284 bool terminator_found = false;
18285
18286 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18287 terminator_found = true;
18288 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18289 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18290 terminator_found = true;
18291 }
18292
18293 if (terminator_found) {
18294 while (true) {
18295 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18296 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18297 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18298 break;
18299 }
18300 }
18301 }
18302
18303 // If we hit a right parenthesis, then we're done parsing the
18304 // parentheses node, and we can check which kind of node we should
18305 // return.
18306 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18307 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18308 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18309 }
18310
18311 parser_lex(parser);
18312 pm_accepts_block_stack_pop(parser);
18313
18314 pop_block_exits(parser, previous_block_exits);
18315 pm_node_list_free(&current_block_exits);
18316
18317 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18318 // If we have a single statement and are ending on a right
18319 // parenthesis, then we need to check if this is possibly a
18320 // multiple target node.
18321 pm_multi_target_node_t *multi_target;
18322
18323 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18324 multi_target = (pm_multi_target_node_t *) statement;
18325 } else {
18326 multi_target = pm_multi_target_node_create(parser);
18327 pm_multi_target_node_targets_append(parser, multi_target, statement);
18328 }
18329
18330 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18331 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18332
18333 multi_target->lparen_loc = lparen_loc;
18334 multi_target->rparen_loc = rparen_loc;
18335 multi_target->base.location.start = lparen_loc.start;
18336 multi_target->base.location.end = rparen_loc.end;
18337
18338 pm_node_t *result;
18339 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18340 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18341 accept1(parser, PM_TOKEN_NEWLINE);
18342 } else {
18343 result = (pm_node_t *) multi_target;
18344 }
18345
18346 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18347 // All set, this is explicitly allowed by the parent
18348 // context.
18349 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18350 // All set, we're inside a for loop and we're parsing
18351 // multiple targets.
18352 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18353 // Multi targets are not allowed when it's not a
18354 // statement level.
18355 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18356 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18357 // Multi targets must be followed by an equal sign in
18358 // order to be valid (or a right parenthesis if they are
18359 // nested).
18360 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18361 }
18362
18363 return result;
18364 }
18365
18366 // If we have a single statement and are ending on a right parenthesis
18367 // and we didn't return a multiple assignment node, then we can return a
18368 // regular parentheses node now.
18369 pm_statements_node_t *statements = pm_statements_node_create(parser);
18370 pm_statements_node_body_append(parser, statements, statement, true);
18371
18372 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18373 }
18374
18375 // If we have more than one statement in the set of parentheses,
18376 // then we are going to parse all of them as a list of statements.
18377 // We'll do that here.
18378 context_push(parser, PM_CONTEXT_PARENS);
18379 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18380
18381 pm_statements_node_t *statements = pm_statements_node_create(parser);
18382 pm_statements_node_body_append(parser, statements, statement, true);
18383
18384 // If we didn't find a terminator and we didn't find a right
18385 // parenthesis, then this is a syntax error.
18386 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18387 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18388 }
18389
18390 // Parse each statement within the parentheses.
18391 while (true) {
18392 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18393 pm_statements_node_body_append(parser, statements, node, true);
18394
18395 // If we're recovering from a syntax error, then we need to stop
18396 // parsing the statements now.
18397 if (parser->recovering) {
18398 // If this is the level of context where the recovery has
18399 // happened, then we can mark the parser as done recovering.
18400 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18401 break;
18402 }
18403
18404 // If we couldn't parse an expression at all, then we need to
18405 // bail out of the loop.
18406 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18407
18408 // If we successfully parsed a statement, then we are going to
18409 // need terminator to delimit them.
18410 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18411 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18412 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18413 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18414 break;
18415 } else if (!match1(parser, PM_TOKEN_EOF)) {
18416 // If we're at the end of the file, then we're going to add
18417 // an error after this for the ) anyway.
18418 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18419 }
18420 }
18421
18422 context_pop(parser);
18423 pm_accepts_block_stack_pop(parser);
18424 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18425
18426 // When we're parsing multi targets, we allow them to be followed by
18427 // a right parenthesis if they are at the statement level. This is
18428 // only possible if they are the final statement in a parentheses.
18429 // We need to explicitly reject that here.
18430 {
18431 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18432
18433 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18434 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18435 pm_multi_target_node_targets_append(parser, multi_target, statement);
18436
18437 statement = (pm_node_t *) multi_target;
18438 statements->body.nodes[statements->body.size - 1] = statement;
18439 }
18440
18441 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18442 const uint8_t *offset = statement->location.end;
18443 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18444 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18445
18446 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18447 statements->body.nodes[statements->body.size - 1] = statement;
18448
18449 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18450 }
18451 }
18452
18453 pop_block_exits(parser, previous_block_exits);
18454 pm_node_list_free(&current_block_exits);
18455
18456 pm_void_statements_check(parser, statements, true);
18457 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18458 }
18459 case PM_TOKEN_BRACE_LEFT: {
18460 // If we were passed a current_hash_keys via the parser, then that
18461 // means we're already parsing a hash and we want to share the set
18462 // of hash keys with this inner hash we're about to parse for the
18463 // sake of warnings. We'll set it to NULL after we grab it to make
18464 // sure subsequent expressions don't use it. Effectively this is a
18465 // way of getting around passing it to every call to
18466 // parse_expression.
18467 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18468 parser->current_hash_keys = NULL;
18469
18470 pm_accepts_block_stack_push(parser, true);
18471 parser_lex(parser);
18472
18473 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18474
18475 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18476 if (current_hash_keys != NULL) {
18477 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18478 } else {
18479 pm_static_literals_t hash_keys = { 0 };
18480 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18481 pm_static_literals_free(&hash_keys);
18482 }
18483
18484 accept1(parser, PM_TOKEN_NEWLINE);
18485 }
18486
18487 pm_accepts_block_stack_pop(parser);
18488 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18489 pm_hash_node_closing_loc_set(node, &parser->previous);
18490
18491 return (pm_node_t *) node;
18492 }
18493 case PM_TOKEN_CHARACTER_LITERAL: {
18494 pm_token_t closing = not_provided(parser);
18495 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(
18496 parser,
18497 &(pm_token_t) {
18498 .type = PM_TOKEN_STRING_BEGIN,
18499 .start = parser->current.start,
18500 .end = parser->current.start + 1
18501 },
18502 &(pm_token_t) {
18503 .type = PM_TOKEN_STRING_CONTENT,
18504 .start = parser->current.start + 1,
18505 .end = parser->current.end
18506 },
18507 &closing
18508 );
18509
18510 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18511
18512 // Skip past the character literal here, since now we have handled
18513 // parser->explicit_encoding correctly.
18514 parser_lex(parser);
18515
18516 // Characters can be followed by strings in which case they are
18517 // automatically concatenated.
18518 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18519 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18520 }
18521
18522 return node;
18523 }
18524 case PM_TOKEN_CLASS_VARIABLE: {
18525 parser_lex(parser);
18526 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18527
18528 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18529 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18530 }
18531
18532 return node;
18533 }
18534 case PM_TOKEN_CONSTANT: {
18535 parser_lex(parser);
18536 pm_token_t constant = parser->previous;
18537
18538 // If a constant is immediately followed by parentheses, then this is in
18539 // fact a method call, not a constant read.
18540 if (
18541 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18542 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18543 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18544 match1(parser, PM_TOKEN_BRACE_LEFT)
18545 ) {
18546 pm_arguments_t arguments = { 0 };
18547 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18548 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18549 }
18550
18551 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18552
18553 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18554 // If we get here, then we have a comma immediately following a
18555 // constant, so we're going to parse this as a multiple assignment.
18556 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18557 }
18558
18559 return node;
18560 }
18561 case PM_TOKEN_UCOLON_COLON: {
18562 parser_lex(parser);
18563 pm_token_t delimiter = parser->previous;
18564
18565 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18566 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18567
18568 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18569 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18570 }
18571
18572 return node;
18573 }
18574 case PM_TOKEN_UDOT_DOT:
18575 case PM_TOKEN_UDOT_DOT_DOT: {
18576 pm_token_t operator = parser->current;
18577 parser_lex(parser);
18578
18579 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18580
18581 // Unary .. and ... are special because these are non-associative
18582 // operators that can also be unary operators. In this case we need
18583 // to explicitly reject code that has a .. or ... that follows this
18584 // expression.
18585 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18586 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18587 }
18588
18589 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18590 }
18591 case PM_TOKEN_FLOAT:
18592 parser_lex(parser);
18593 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18594 case PM_TOKEN_FLOAT_IMAGINARY:
18595 parser_lex(parser);
18596 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18597 case PM_TOKEN_FLOAT_RATIONAL:
18598 parser_lex(parser);
18599 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18600 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
18601 parser_lex(parser);
18602 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18603 case PM_TOKEN_NUMBERED_REFERENCE: {
18604 parser_lex(parser);
18605 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18606
18607 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18608 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18609 }
18610
18611 return node;
18612 }
18613 case PM_TOKEN_GLOBAL_VARIABLE: {
18614 parser_lex(parser);
18615 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18616
18617 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18618 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18619 }
18620
18621 return node;
18622 }
18623 case PM_TOKEN_BACK_REFERENCE: {
18624 parser_lex(parser);
18625 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18626
18627 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18628 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18629 }
18630
18631 return node;
18632 }
18633 case PM_TOKEN_IDENTIFIER:
18634 case PM_TOKEN_METHOD_NAME: {
18635 parser_lex(parser);
18636 pm_token_t identifier = parser->previous;
18637 pm_node_t *node = parse_variable_call(parser);
18638
18639 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18640 // If parse_variable_call returned with a call node, then we
18641 // know the identifier is not in the local table. In that case
18642 // we need to check if there are arguments following the
18643 // identifier.
18644 pm_call_node_t *call = (pm_call_node_t *) node;
18645 pm_arguments_t arguments = { 0 };
18646
18647 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18648 // Since we found arguments, we need to turn off the
18649 // variable call bit in the flags.
18650 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18651
18652 call->opening_loc = arguments.opening_loc;
18653 call->arguments = arguments.arguments;
18654 call->closing_loc = arguments.closing_loc;
18655 call->block = arguments.block;
18656
18657 const uint8_t *end = pm_arguments_end(&arguments);
18658 if (!end) {
18659 end = call->message_loc.end;
18660 }
18661 call->base.location.end = end;
18662 }
18663 } else {
18664 // Otherwise, we know the identifier is in the local table. This
18665 // can still be a method call if it is followed by arguments or
18666 // a block, so we need to check for that here.
18667 if (
18668 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18669 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18670 match1(parser, PM_TOKEN_BRACE_LEFT)
18671 ) {
18672 pm_arguments_t arguments = { 0 };
18673 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18674 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18675
18676 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18677 // If we're about to convert an 'it' implicit local
18678 // variable read into a method call, we need to remove
18679 // it from the list of implicit local variables.
18680 parse_target_implicit_parameter(parser, node);
18681 } else {
18682 // Otherwise, we're about to convert a regular local
18683 // variable read into a method call, in which case we
18684 // need to indicate that this was not a read for the
18685 // purposes of warnings.
18686 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18687
18688 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18689 parse_target_implicit_parameter(parser, node);
18690 } else {
18692 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18693 }
18694 }
18695
18696 pm_node_destroy(parser, node);
18697 return (pm_node_t *) fcall;
18698 }
18699 }
18700
18701 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18702 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18703 }
18704
18705 return node;
18706 }
18707 case PM_TOKEN_HEREDOC_START: {
18708 // Here we have found a heredoc. We'll parse it and add it to the
18709 // list of strings.
18710 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18711 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18712
18713 size_t common_whitespace = (size_t) -1;
18714 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18715
18716 parser_lex(parser);
18717 pm_token_t opening = parser->previous;
18718
18719 pm_node_t *node;
18720 pm_node_t *part;
18721
18722 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18723 // If we get here, then we have an empty heredoc. We'll create
18724 // an empty content token and return an empty string node.
18725 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18726 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18727
18728 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18729 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18730 } else {
18731 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18732 }
18733
18734 node->location.end = opening.end;
18735 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18736 // If we get here, then we tried to find something in the
18737 // heredoc but couldn't actually parse anything, so we'll just
18738 // return a missing node.
18739 //
18740 // parse_string_part handles its own errors, so there is no need
18741 // for us to add one here.
18742 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18743 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18744 // If we get here, then the part that we parsed was plain string
18745 // content and we're at the end of the heredoc, so we can return
18746 // just a string node with the heredoc opening and closing as
18747 // its opening and closing.
18748 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18749 pm_string_node_t *cast = (pm_string_node_t *) part;
18750
18751 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18752 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18753 cast->base.location = cast->opening_loc;
18754
18755 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18756 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18757 cast->base.type = PM_X_STRING_NODE;
18758 }
18759
18760 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18761 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18762 }
18763
18764 node = (pm_node_t *) cast;
18765 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18766 } else {
18767 // If we get here, then we have multiple parts in the heredoc,
18768 // so we'll need to create an interpolated string node to hold
18769 // them all.
18770 pm_node_list_t parts = { 0 };
18771 pm_node_list_append(&parts, part);
18772
18773 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18774 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18775 pm_node_list_append(&parts, part);
18776 }
18777 }
18778
18779 // Now that we have all of the parts, create the correct type of
18780 // interpolated node.
18781 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18782 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18783 cast->parts = parts;
18784
18785 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18786 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18787
18788 cast->base.location = cast->opening_loc;
18789 node = (pm_node_t *) cast;
18790 } else {
18791 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18792 pm_node_list_free(&parts);
18793
18794 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18795 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18796
18797 cast->base.location = cast->opening_loc;
18798 node = (pm_node_t *) cast;
18799 }
18800
18801 // If this is a heredoc that is indented with a ~, then we need
18802 // to dedent each line by the common leading whitespace.
18803 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18804 pm_node_list_t *nodes;
18805 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18806 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18807 } else {
18808 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18809 }
18810
18811 parse_heredoc_dedent(parser, nodes, common_whitespace);
18812 }
18813 }
18814
18815 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18816 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18817 }
18818
18819 return node;
18820 }
18821 case PM_TOKEN_INSTANCE_VARIABLE: {
18822 parser_lex(parser);
18823 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18824
18825 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18826 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18827 }
18828
18829 return node;
18830 }
18831 case PM_TOKEN_INTEGER: {
18832 pm_node_flags_t base = parser->integer_base;
18833 parser_lex(parser);
18834 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18835 }
18836 case PM_TOKEN_INTEGER_IMAGINARY: {
18837 pm_node_flags_t base = parser->integer_base;
18838 parser_lex(parser);
18839 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18840 }
18841 case PM_TOKEN_INTEGER_RATIONAL: {
18842 pm_node_flags_t base = parser->integer_base;
18843 parser_lex(parser);
18844 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18845 }
18846 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18847 pm_node_flags_t base = parser->integer_base;
18848 parser_lex(parser);
18849 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18850 }
18851 case PM_TOKEN_KEYWORD___ENCODING__:
18852 parser_lex(parser);
18853 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18854 case PM_TOKEN_KEYWORD___FILE__:
18855 parser_lex(parser);
18856 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18857 case PM_TOKEN_KEYWORD___LINE__:
18858 parser_lex(parser);
18859 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18860 case PM_TOKEN_KEYWORD_ALIAS: {
18861 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18862 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18863 }
18864
18865 parser_lex(parser);
18866 pm_token_t keyword = parser->previous;
18867
18868 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18869 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18870
18871 switch (PM_NODE_TYPE(new_name)) {
18872 case PM_BACK_REFERENCE_READ_NODE:
18873 case PM_NUMBERED_REFERENCE_READ_NODE:
18874 case PM_GLOBAL_VARIABLE_READ_NODE: {
18875 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18876 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18877 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18878 }
18879 } else {
18880 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18881 }
18882
18883 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18884 }
18885 case PM_SYMBOL_NODE:
18886 case PM_INTERPOLATED_SYMBOL_NODE: {
18887 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18888 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18889 }
18890 }
18892 default:
18893 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18894 }
18895 }
18896 case PM_TOKEN_KEYWORD_CASE: {
18897 size_t opening_newline_index = token_newline_index(parser);
18898 parser_lex(parser);
18899
18900 pm_token_t case_keyword = parser->previous;
18901 pm_node_t *predicate = NULL;
18902
18903 pm_node_list_t current_block_exits = { 0 };
18904 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18905
18906 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18907 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18908 predicate = NULL;
18909 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18910 predicate = NULL;
18911 } else if (!token_begins_expression_p(parser->current.type)) {
18912 predicate = NULL;
18913 } else {
18914 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18915 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18916 }
18917
18918 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18919 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18920 parser_lex(parser);
18921
18922 pop_block_exits(parser, previous_block_exits);
18923 pm_node_list_free(&current_block_exits);
18924
18925 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18926 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18927 }
18928
18929 // At this point we can create a case node, though we don't yet know
18930 // if it is a case-in or case-when node.
18931 pm_token_t end_keyword = not_provided(parser);
18932 pm_node_t *node;
18933
18934 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18935 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18936 pm_static_literals_t literals = { 0 };
18937
18938 // At this point we've seen a when keyword, so we know this is a
18939 // case-when node. We will continue to parse the when nodes
18940 // until we hit the end of the list.
18941 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18942 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18943 parser_lex(parser);
18944
18945 pm_token_t when_keyword = parser->previous;
18946 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18947
18948 do {
18949 if (accept1(parser, PM_TOKEN_USTAR)) {
18950 pm_token_t operator = parser->previous;
18951 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18952
18953 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18954 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18955
18956 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18957 } else {
18958 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18959 pm_when_node_conditions_append(when_node, condition);
18960
18961 // If we found a missing node, then this is a syntax
18962 // error and we should stop looping.
18963 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18964
18965 // If this is a string node, then we need to mark it
18966 // as frozen because when clause strings are frozen.
18967 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18968 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18969 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18970 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18971 }
18972
18973 pm_when_clause_static_literals_add(parser, &literals, condition);
18974 }
18975 } while (accept1(parser, PM_TOKEN_COMMA));
18976
18977 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18978 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18979 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18980 }
18981 } else {
18982 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18983 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18984 }
18985
18986 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18987 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18988 if (statements != NULL) {
18989 pm_when_node_statements_set(when_node, statements);
18990 }
18991 }
18992
18993 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18994 }
18995
18996 // If we didn't parse any conditions (in or when) then we need
18997 // to indicate that we have an error.
18998 if (case_node->conditions.size == 0) {
18999 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19000 }
19001
19002 pm_static_literals_free(&literals);
19003 node = (pm_node_t *) case_node;
19004 } else {
19005 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
19006
19007 // If this is a case-match node (i.e., it is a pattern matching
19008 // case statement) then we must have a predicate.
19009 if (predicate == NULL) {
19010 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
19011 }
19012
19013 // At this point we expect that we're parsing a case-in node. We
19014 // will continue to parse the in nodes until we hit the end of
19015 // the list.
19016 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
19017 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
19018
19019 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
19020 parser->pattern_matching_newlines = true;
19021
19022 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
19023 parser->command_start = false;
19024 parser_lex(parser);
19025
19026 pm_token_t in_keyword = parser->previous;
19027
19028 pm_constant_id_list_t captures = { 0 };
19029 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
19030
19031 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
19032 pm_constant_id_list_free(&captures);
19033
19034 // Since we're in the top-level of the case-in node we need
19035 // to check for guard clauses in the form of `if` or
19036 // `unless` statements.
19037 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
19038 pm_token_t keyword = parser->previous;
19039 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
19040 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
19041 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
19042 pm_token_t keyword = parser->previous;
19043 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
19044 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
19045 }
19046
19047 // Now we need to check for the terminator of the in node's
19048 // pattern. It can be a newline or semicolon optionally
19049 // followed by a `then` keyword.
19050 pm_token_t then_keyword;
19051 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19052 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
19053 then_keyword = parser->previous;
19054 } else {
19055 then_keyword = not_provided(parser);
19056 }
19057 } else {
19058 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
19059 then_keyword = parser->previous;
19060 }
19061
19062 // Now we can actually parse the statements associated with
19063 // the in node.
19064 pm_statements_node_t *statements;
19065 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19066 statements = NULL;
19067 } else {
19068 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
19069 }
19070
19071 // Now that we have the full pattern and statements, we can
19072 // create the node and attach it to the case node.
19073 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
19074 pm_case_match_node_condition_append(case_node, condition);
19075 }
19076
19077 // If we didn't parse any conditions (in or when) then we need
19078 // to indicate that we have an error.
19079 if (case_node->conditions.size == 0) {
19080 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19081 }
19082
19083 node = (pm_node_t *) case_node;
19084 }
19085
19086 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19087 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19088 pm_token_t else_keyword = parser->previous;
19089 pm_else_node_t *else_node;
19090
19091 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19092 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19093 } else {
19094 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19095 }
19096
19097 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19098 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19099 } else {
19100 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19101 }
19102 }
19103
19104 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19105 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19106
19107 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19108 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19109 } else {
19110 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19111 }
19112
19113 pop_block_exits(parser, previous_block_exits);
19114 pm_node_list_free(&current_block_exits);
19115
19116 return node;
19117 }
19118 case PM_TOKEN_KEYWORD_BEGIN: {
19119 size_t opening_newline_index = token_newline_index(parser);
19120 parser_lex(parser);
19121
19122 pm_token_t begin_keyword = parser->previous;
19123 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19124
19125 pm_node_list_t current_block_exits = { 0 };
19126 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19127 pm_statements_node_t *begin_statements = NULL;
19128
19129 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19130 pm_accepts_block_stack_push(parser, true);
19131 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19132 pm_accepts_block_stack_pop(parser);
19133 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19134 }
19135
19136 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19137 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19138 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19139
19140 begin_node->base.location.end = parser->previous.end;
19141 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19142
19143 pop_block_exits(parser, previous_block_exits);
19144 pm_node_list_free(&current_block_exits);
19145
19146 return (pm_node_t *) begin_node;
19147 }
19148 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19149 pm_node_list_t current_block_exits = { 0 };
19150 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19151
19152 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19153 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19154 }
19155
19156 parser_lex(parser);
19157 pm_token_t keyword = parser->previous;
19158
19159 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19160 pm_token_t opening = parser->previous;
19161 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19162
19163 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19164 pm_context_t context = parser->current_context->context;
19165 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19166 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19167 }
19168
19169 flush_block_exits(parser, previous_block_exits);
19170 pm_node_list_free(&current_block_exits);
19171
19172 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19173 }
19174 case PM_TOKEN_KEYWORD_BREAK:
19175 case PM_TOKEN_KEYWORD_NEXT:
19176 case PM_TOKEN_KEYWORD_RETURN: {
19177 parser_lex(parser);
19178
19179 pm_token_t keyword = parser->previous;
19180 pm_arguments_t arguments = { 0 };
19181
19182 if (
19183 token_begins_expression_p(parser->current.type) ||
19184 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19185 ) {
19186 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19187
19188 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19189 pm_token_t next = parser->current;
19190 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19191
19192 // Reject `foo && return bar`.
19193 if (!accepts_command_call && arguments.arguments != NULL) {
19194 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
19195 }
19196 }
19197 }
19198
19199 switch (keyword.type) {
19200 case PM_TOKEN_KEYWORD_BREAK: {
19201 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19202 if (!parser->partial_script) parse_block_exit(parser, node);
19203 return node;
19204 }
19205 case PM_TOKEN_KEYWORD_NEXT: {
19206 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19207 if (!parser->partial_script) parse_block_exit(parser, node);
19208 return node;
19209 }
19210 case PM_TOKEN_KEYWORD_RETURN: {
19211 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19212 parse_return(parser, node);
19213 return node;
19214 }
19215 default:
19216 assert(false && "unreachable");
19217 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19218 }
19219 }
19220 case PM_TOKEN_KEYWORD_SUPER: {
19221 parser_lex(parser);
19222
19223 pm_token_t keyword = parser->previous;
19224 pm_arguments_t arguments = { 0 };
19225 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19226
19227 if (
19228 arguments.opening_loc.start == NULL &&
19229 arguments.arguments == NULL &&
19230 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19231 ) {
19232 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19233 }
19234
19235 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19236 }
19237 case PM_TOKEN_KEYWORD_YIELD: {
19238 parser_lex(parser);
19239
19240 pm_token_t keyword = parser->previous;
19241 pm_arguments_t arguments = { 0 };
19242 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19243
19244 // It's possible that we've parsed a block argument through our
19245 // call to parse_arguments_list. If we found one, we should mark it
19246 // as invalid and destroy it, as we don't have a place for it on the
19247 // yield node.
19248 if (arguments.block != NULL) {
19249 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19250 pm_node_destroy(parser, arguments.block);
19251 arguments.block = NULL;
19252 }
19253
19254 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19255 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19256
19257 return node;
19258 }
19259 case PM_TOKEN_KEYWORD_CLASS: {
19260 size_t opening_newline_index = token_newline_index(parser);
19261 parser_lex(parser);
19262
19263 pm_token_t class_keyword = parser->previous;
19264 pm_do_loop_stack_push(parser, false);
19265
19266 pm_node_list_t current_block_exits = { 0 };
19267 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19268
19269 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19270 pm_token_t operator = parser->previous;
19271 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19272
19273 pm_parser_scope_push(parser, true);
19274 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19275 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19276 }
19277
19278 pm_node_t *statements = NULL;
19279 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19280 pm_accepts_block_stack_push(parser, true);
19281 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19282 pm_accepts_block_stack_pop(parser);
19283 }
19284
19285 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19286 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19287 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19288 } else {
19289 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19290 }
19291
19292 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19293
19294 pm_constant_id_list_t locals;
19295 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19296
19297 pm_parser_scope_pop(parser);
19298 pm_do_loop_stack_pop(parser);
19299
19300 flush_block_exits(parser, previous_block_exits);
19301 pm_node_list_free(&current_block_exits);
19302
19303 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19304 }
19305
19306 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19307 pm_token_t name = parser->previous;
19308 if (name.type != PM_TOKEN_CONSTANT) {
19309 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19310 }
19311
19312 pm_token_t inheritance_operator;
19313 pm_node_t *superclass;
19314
19315 if (match1(parser, PM_TOKEN_LESS)) {
19316 inheritance_operator = parser->current;
19317 lex_state_set(parser, PM_LEX_STATE_BEG);
19318
19319 parser->command_start = true;
19320 parser_lex(parser);
19321
19322 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19323 } else {
19324 inheritance_operator = not_provided(parser);
19325 superclass = NULL;
19326 }
19327
19328 pm_parser_scope_push(parser, true);
19329
19330 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19331 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19332 } else {
19333 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19334 }
19335 pm_node_t *statements = NULL;
19336
19337 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19338 pm_accepts_block_stack_push(parser, true);
19339 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19340 pm_accepts_block_stack_pop(parser);
19341 }
19342
19343 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19344 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19345 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19346 } else {
19347 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19348 }
19349
19350 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19351
19352 if (context_def_p(parser)) {
19353 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19354 }
19355
19356 pm_constant_id_list_t locals;
19357 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19358
19359 pm_parser_scope_pop(parser);
19360 pm_do_loop_stack_pop(parser);
19361
19362 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19363 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19364 }
19365
19366 pop_block_exits(parser, previous_block_exits);
19367 pm_node_list_free(&current_block_exits);
19368
19369 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19370 }
19371 case PM_TOKEN_KEYWORD_DEF: {
19372 pm_node_list_t current_block_exits = { 0 };
19373 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19374
19375 pm_token_t def_keyword = parser->current;
19376 size_t opening_newline_index = token_newline_index(parser);
19377
19378 pm_node_t *receiver = NULL;
19379 pm_token_t operator = not_provided(parser);
19380 pm_token_t name;
19381
19382 // This context is necessary for lexing `...` in a bare params
19383 // correctly. It must be pushed before lexing the first param, so it
19384 // is here.
19385 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19386 parser_lex(parser);
19387
19388 // This will be false if the method name is not a valid identifier
19389 // but could be followed by an operator.
19390 bool valid_name = true;
19391
19392 switch (parser->current.type) {
19393 case PM_CASE_OPERATOR:
19394 pm_parser_scope_push(parser, true);
19395 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19396 parser_lex(parser);
19397
19398 name = parser->previous;
19399 break;
19400 case PM_TOKEN_IDENTIFIER: {
19401 parser_lex(parser);
19402
19403 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19404 receiver = parse_variable_call(parser);
19405
19406 pm_parser_scope_push(parser, true);
19407 lex_state_set(parser, PM_LEX_STATE_FNAME);
19408 parser_lex(parser);
19409
19410 operator = parser->previous;
19411 name = parse_method_definition_name(parser);
19412 } else {
19413 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19414 pm_parser_scope_push(parser, true);
19415
19416 name = parser->previous;
19417 }
19418
19419 break;
19420 }
19421 case PM_TOKEN_INSTANCE_VARIABLE:
19422 case PM_TOKEN_CLASS_VARIABLE:
19423 case PM_TOKEN_GLOBAL_VARIABLE:
19424 valid_name = false;
19426 case PM_TOKEN_CONSTANT:
19427 case PM_TOKEN_KEYWORD_NIL:
19428 case PM_TOKEN_KEYWORD_SELF:
19429 case PM_TOKEN_KEYWORD_TRUE:
19430 case PM_TOKEN_KEYWORD_FALSE:
19431 case PM_TOKEN_KEYWORD___FILE__:
19432 case PM_TOKEN_KEYWORD___LINE__:
19433 case PM_TOKEN_KEYWORD___ENCODING__: {
19434 pm_parser_scope_push(parser, true);
19435 parser_lex(parser);
19436
19437 pm_token_t identifier = parser->previous;
19438
19439 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19440 lex_state_set(parser, PM_LEX_STATE_FNAME);
19441 parser_lex(parser);
19442 operator = parser->previous;
19443
19444 switch (identifier.type) {
19445 case PM_TOKEN_CONSTANT:
19446 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19447 break;
19448 case PM_TOKEN_INSTANCE_VARIABLE:
19449 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19450 break;
19451 case PM_TOKEN_CLASS_VARIABLE:
19452 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19453 break;
19454 case PM_TOKEN_GLOBAL_VARIABLE:
19455 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19456 break;
19457 case PM_TOKEN_KEYWORD_NIL:
19458 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19459 break;
19460 case PM_TOKEN_KEYWORD_SELF:
19461 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19462 break;
19463 case PM_TOKEN_KEYWORD_TRUE:
19464 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19465 break;
19466 case PM_TOKEN_KEYWORD_FALSE:
19467 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19468 break;
19469 case PM_TOKEN_KEYWORD___FILE__:
19470 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19471 break;
19472 case PM_TOKEN_KEYWORD___LINE__:
19473 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19474 break;
19475 case PM_TOKEN_KEYWORD___ENCODING__:
19476 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19477 break;
19478 default:
19479 break;
19480 }
19481
19482 name = parse_method_definition_name(parser);
19483 } else {
19484 if (!valid_name) {
19485 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19486 }
19487
19488 name = identifier;
19489 }
19490 break;
19491 }
19492 case PM_TOKEN_PARENTHESIS_LEFT: {
19493 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19494 // the inner expression of this parenthesis should not be
19495 // processed under this context. Thus, the context is popped
19496 // here.
19497 context_pop(parser);
19498 parser_lex(parser);
19499
19500 pm_token_t lparen = parser->previous;
19501 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19502
19503 accept1(parser, PM_TOKEN_NEWLINE);
19504 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19505 pm_token_t rparen = parser->previous;
19506
19507 lex_state_set(parser, PM_LEX_STATE_FNAME);
19508 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19509
19510 operator = parser->previous;
19511 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19512
19513 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19514 // reason as described the above.
19515 pm_parser_scope_push(parser, true);
19516 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19517 name = parse_method_definition_name(parser);
19518 break;
19519 }
19520 default:
19521 pm_parser_scope_push(parser, true);
19522 name = parse_method_definition_name(parser);
19523 break;
19524 }
19525
19526 pm_token_t lparen;
19527 pm_token_t rparen;
19528 pm_parameters_node_t *params;
19529
19530 switch (parser->current.type) {
19531 case PM_TOKEN_PARENTHESIS_LEFT: {
19532 parser_lex(parser);
19533 lparen = parser->previous;
19534
19535 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19536 params = NULL;
19537 } else {
19538 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19539 }
19540
19541 lex_state_set(parser, PM_LEX_STATE_BEG);
19542 parser->command_start = true;
19543
19544 context_pop(parser);
19545 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19546 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19547 parser->previous.start = parser->previous.end;
19548 parser->previous.type = PM_TOKEN_MISSING;
19549 }
19550
19551 rparen = parser->previous;
19552 break;
19553 }
19554 case PM_CASE_PARAMETER: {
19555 // If we're about to lex a label, we need to add the label
19556 // state to make sure the next newline is ignored.
19557 if (parser->current.type == PM_TOKEN_LABEL) {
19558 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19559 }
19560
19561 lparen = not_provided(parser);
19562 rparen = not_provided(parser);
19563 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19564
19565 context_pop(parser);
19566 break;
19567 }
19568 default: {
19569 lparen = not_provided(parser);
19570 rparen = not_provided(parser);
19571 params = NULL;
19572
19573 context_pop(parser);
19574 break;
19575 }
19576 }
19577
19578 pm_node_t *statements = NULL;
19579 pm_token_t equal;
19580 pm_token_t end_keyword;
19581
19582 if (accept1(parser, PM_TOKEN_EQUAL)) {
19583 if (token_is_setter_name(&name)) {
19584 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19585 }
19586 equal = parser->previous;
19587
19588 context_push(parser, PM_CONTEXT_DEF);
19589 pm_do_loop_stack_push(parser, false);
19590 statements = (pm_node_t *) pm_statements_node_create(parser);
19591
19592 bool allow_command_call;
19593 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
19594 allow_command_call = accepts_command_call;
19595 } else {
19596 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
19597 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
19598 }
19599
19600 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19601
19602 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19603 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19604
19605 pm_token_t rescue_keyword = parser->previous;
19606 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19607 context_pop(parser);
19608
19609 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19610 }
19611
19612 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19613 pm_do_loop_stack_pop(parser);
19614 context_pop(parser);
19615 end_keyword = not_provided(parser);
19616 } else {
19617 equal = not_provided(parser);
19618
19619 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19620 lex_state_set(parser, PM_LEX_STATE_BEG);
19621 parser->command_start = true;
19622 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19623 } else {
19624 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19625 }
19626
19627 pm_accepts_block_stack_push(parser, true);
19628 pm_do_loop_stack_push(parser, false);
19629
19630 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19631 pm_accepts_block_stack_push(parser, true);
19632 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19633 pm_accepts_block_stack_pop(parser);
19634 }
19635
19636 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19637 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19638 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19639 } else {
19640 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19641 }
19642
19643 pm_accepts_block_stack_pop(parser);
19644 pm_do_loop_stack_pop(parser);
19645
19646 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19647 end_keyword = parser->previous;
19648 }
19649
19650 pm_constant_id_list_t locals;
19651 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19652 pm_parser_scope_pop(parser);
19653
19659 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19660
19661 flush_block_exits(parser, previous_block_exits);
19662 pm_node_list_free(&current_block_exits);
19663
19664 return (pm_node_t *) pm_def_node_create(
19665 parser,
19666 name_id,
19667 &name,
19668 receiver,
19669 params,
19670 statements,
19671 &locals,
19672 &def_keyword,
19673 &operator,
19674 &lparen,
19675 &rparen,
19676 &equal,
19677 &end_keyword
19678 );
19679 }
19680 case PM_TOKEN_KEYWORD_DEFINED: {
19681 parser_lex(parser);
19682 pm_token_t keyword = parser->previous;
19683
19684 pm_token_t lparen;
19685 pm_token_t rparen;
19686 pm_node_t *expression;
19687
19688 context_push(parser, PM_CONTEXT_DEFINED);
19689 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19690
19691 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19692 lparen = parser->previous;
19693
19694 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19695 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19696 lparen = not_provided(parser);
19697 rparen = not_provided(parser);
19698 } else {
19699 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19700
19701 if (parser->recovering) {
19702 rparen = not_provided(parser);
19703 } else {
19704 accept1(parser, PM_TOKEN_NEWLINE);
19705 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19706 rparen = parser->previous;
19707 }
19708 }
19709 } else {
19710 lparen = not_provided(parser);
19711 rparen = not_provided(parser);
19712 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19713 }
19714
19715 context_pop(parser);
19716 return (pm_node_t *) pm_defined_node_create(
19717 parser,
19718 &lparen,
19719 expression,
19720 &rparen,
19721 &PM_LOCATION_TOKEN_VALUE(&keyword)
19722 );
19723 }
19724 case PM_TOKEN_KEYWORD_END_UPCASE: {
19725 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19726 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19727 }
19728
19729 parser_lex(parser);
19730 pm_token_t keyword = parser->previous;
19731
19732 if (context_def_p(parser)) {
19733 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19734 }
19735
19736 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19737 pm_token_t opening = parser->previous;
19738 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19739
19740 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19741 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19742 }
19743 case PM_TOKEN_KEYWORD_FALSE:
19744 parser_lex(parser);
19745 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19746 case PM_TOKEN_KEYWORD_FOR: {
19747 size_t opening_newline_index = token_newline_index(parser);
19748 parser_lex(parser);
19749
19750 pm_token_t for_keyword = parser->previous;
19751 pm_node_t *index;
19752
19753 context_push(parser, PM_CONTEXT_FOR_INDEX);
19754
19755 // First, parse out the first index expression.
19756 if (accept1(parser, PM_TOKEN_USTAR)) {
19757 pm_token_t star_operator = parser->previous;
19758 pm_node_t *name = NULL;
19759
19760 if (token_begins_expression_p(parser->current.type)) {
19761 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19762 }
19763
19764 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19765 } else if (token_begins_expression_p(parser->current.type)) {
19766 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19767 } else {
19768 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19769 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19770 }
19771
19772 // Now, if there are multiple index expressions, parse them out.
19773 if (match1(parser, PM_TOKEN_COMMA)) {
19774 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19775 } else {
19776 index = parse_target(parser, index, false, false);
19777 }
19778
19779 context_pop(parser);
19780 pm_do_loop_stack_push(parser, true);
19781
19782 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19783 pm_token_t in_keyword = parser->previous;
19784
19785 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19786 pm_do_loop_stack_pop(parser);
19787
19788 pm_token_t do_keyword;
19789 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19790 do_keyword = parser->previous;
19791 } else {
19792 do_keyword = not_provided(parser);
19793 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19794 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19795 }
19796 }
19797
19798 pm_statements_node_t *statements = NULL;
19799 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19800 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19801 }
19802
19803 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19804 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19805
19806 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19807 }
19808 case PM_TOKEN_KEYWORD_IF:
19809 if (parser_end_of_line_p(parser)) {
19810 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19811 }
19812
19813 size_t opening_newline_index = token_newline_index(parser);
19814 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19815 parser_lex(parser);
19816
19817 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19818 case PM_TOKEN_KEYWORD_UNDEF: {
19819 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19820 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19821 }
19822
19823 parser_lex(parser);
19824 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19825 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19826
19827 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19828 pm_node_destroy(parser, name);
19829 } else {
19830 pm_undef_node_append(undef, name);
19831
19832 while (match1(parser, PM_TOKEN_COMMA)) {
19833 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19834 parser_lex(parser);
19835 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19836
19837 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19838 pm_node_destroy(parser, name);
19839 break;
19840 }
19841
19842 pm_undef_node_append(undef, name);
19843 }
19844 }
19845
19846 return (pm_node_t *) undef;
19847 }
19848 case PM_TOKEN_KEYWORD_NOT: {
19849 parser_lex(parser);
19850
19851 pm_token_t message = parser->previous;
19852 pm_arguments_t arguments = { 0 };
19853 pm_node_t *receiver = NULL;
19854
19855 // If we do not accept a command call, then we also do not accept a
19856 // not without parentheses. In this case we need to reject this
19857 // syntax.
19858 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19859 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19860 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19861 } else {
19862 accept1(parser, PM_TOKEN_NEWLINE);
19863 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19864 }
19865
19866 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19867 }
19868
19869 accept1(parser, PM_TOKEN_NEWLINE);
19870
19871 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19872 pm_token_t lparen = parser->previous;
19873
19874 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19875 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19876 } else {
19877 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19878 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19879
19880 if (!parser->recovering) {
19881 accept1(parser, PM_TOKEN_NEWLINE);
19882 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19883 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19884 }
19885 }
19886 } else {
19887 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19888 }
19889
19890 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19891 }
19892 case PM_TOKEN_KEYWORD_UNLESS: {
19893 size_t opening_newline_index = token_newline_index(parser);
19894 parser_lex(parser);
19895
19896 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19897 }
19898 case PM_TOKEN_KEYWORD_MODULE: {
19899 pm_node_list_t current_block_exits = { 0 };
19900 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19901
19902 size_t opening_newline_index = token_newline_index(parser);
19903 parser_lex(parser);
19904 pm_token_t module_keyword = parser->previous;
19905
19906 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19907 pm_token_t name;
19908
19909 // If we can recover from a syntax error that occurred while parsing
19910 // the name of the module, then we'll handle that here.
19911 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19912 pop_block_exits(parser, previous_block_exits);
19913 pm_node_list_free(&current_block_exits);
19914
19915 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19916 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19917 }
19918
19919 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19920 pm_token_t double_colon = parser->previous;
19921
19922 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19923 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19924 }
19925
19926 // Here we retrieve the name of the module. If it wasn't a constant,
19927 // then it's possible that `module foo` was passed, which is a
19928 // syntax error. We handle that here as well.
19929 name = parser->previous;
19930 if (name.type != PM_TOKEN_CONSTANT) {
19931 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19932 }
19933
19934 pm_parser_scope_push(parser, true);
19935 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19936 pm_node_t *statements = NULL;
19937
19938 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19939 pm_accepts_block_stack_push(parser, true);
19940 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19941 pm_accepts_block_stack_pop(parser);
19942 }
19943
19944 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19945 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19946 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19947 } else {
19948 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19949 }
19950
19951 pm_constant_id_list_t locals;
19952 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19953
19954 pm_parser_scope_pop(parser);
19955 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19956
19957 if (context_def_p(parser)) {
19958 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19959 }
19960
19961 pop_block_exits(parser, previous_block_exits);
19962 pm_node_list_free(&current_block_exits);
19963
19964 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19965 }
19966 case PM_TOKEN_KEYWORD_NIL:
19967 parser_lex(parser);
19968 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19969 case PM_TOKEN_KEYWORD_REDO: {
19970 parser_lex(parser);
19971
19972 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19973 if (!parser->partial_script) parse_block_exit(parser, node);
19974
19975 return node;
19976 }
19977 case PM_TOKEN_KEYWORD_RETRY: {
19978 parser_lex(parser);
19979
19980 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19981 parse_retry(parser, node);
19982
19983 return node;
19984 }
19985 case PM_TOKEN_KEYWORD_SELF:
19986 parser_lex(parser);
19987 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19988 case PM_TOKEN_KEYWORD_TRUE:
19989 parser_lex(parser);
19990 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19991 case PM_TOKEN_KEYWORD_UNTIL: {
19992 size_t opening_newline_index = token_newline_index(parser);
19993
19994 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19995 pm_do_loop_stack_push(parser, true);
19996
19997 parser_lex(parser);
19998 pm_token_t keyword = parser->previous;
19999 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20000
20001 pm_do_loop_stack_pop(parser);
20002 context_pop(parser);
20003
20004 pm_token_t do_keyword;
20005 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20006 do_keyword = parser->previous;
20007 } else {
20008 do_keyword = not_provided(parser);
20009 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20010 }
20011
20012 pm_statements_node_t *statements = NULL;
20013 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20014 pm_accepts_block_stack_push(parser, true);
20015 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20016 pm_accepts_block_stack_pop(parser);
20017 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20018 }
20019
20020 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20021 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
20022
20023 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20024 }
20025 case PM_TOKEN_KEYWORD_WHILE: {
20026 size_t opening_newline_index = token_newline_index(parser);
20027
20028 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20029 pm_do_loop_stack_push(parser, true);
20030
20031 parser_lex(parser);
20032 pm_token_t keyword = parser->previous;
20033 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20034
20035 pm_do_loop_stack_pop(parser);
20036 context_pop(parser);
20037
20038 pm_token_t do_keyword;
20039 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20040 do_keyword = parser->previous;
20041 } else {
20042 do_keyword = not_provided(parser);
20043 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20044 }
20045
20046 pm_statements_node_t *statements = NULL;
20047 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20048 pm_accepts_block_stack_push(parser, true);
20049 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20050 pm_accepts_block_stack_pop(parser);
20051 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20052 }
20053
20054 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20055 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
20056
20057 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20058 }
20059 case PM_TOKEN_PERCENT_LOWER_I: {
20060 parser_lex(parser);
20061 pm_token_t opening = parser->previous;
20062 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20063
20064 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20065 accept1(parser, PM_TOKEN_WORDS_SEP);
20066 if (match1(parser, PM_TOKEN_STRING_END)) break;
20067
20068 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20069 pm_token_t opening = not_provided(parser);
20070 pm_token_t closing = not_provided(parser);
20071 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
20072 }
20073
20074 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20075 }
20076
20077 pm_token_t closing = parser->current;
20078 if (match1(parser, PM_TOKEN_EOF)) {
20079 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20080 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20081 } else {
20082 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20083 }
20084 pm_array_node_close_set(array, &closing);
20085
20086 return (pm_node_t *) array;
20087 }
20088 case PM_TOKEN_PERCENT_UPPER_I: {
20089 parser_lex(parser);
20090 pm_token_t opening = parser->previous;
20091 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20092
20093 // This is the current node that we are parsing that will be added to the
20094 // list of elements.
20095 pm_node_t *current = NULL;
20096
20097 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20098 switch (parser->current.type) {
20099 case PM_TOKEN_WORDS_SEP: {
20100 if (current == NULL) {
20101 // If we hit a separator before we have any content, then we don't
20102 // need to do anything.
20103 } else {
20104 // If we hit a separator after we've hit content, then we need to
20105 // append that content to the list and reset the current node.
20106 pm_array_node_elements_append(array, current);
20107 current = NULL;
20108 }
20109
20110 parser_lex(parser);
20111 break;
20112 }
20113 case PM_TOKEN_STRING_CONTENT: {
20114 pm_token_t opening = not_provided(parser);
20115 pm_token_t closing = not_provided(parser);
20116
20117 if (current == NULL) {
20118 // If we hit content and the current node is NULL, then this is
20119 // the first string content we've seen. In that case we're going
20120 // to create a new string node and set that to the current.
20121 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20122 parser_lex(parser);
20123 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20124 // If we hit string content and the current node is an
20125 // interpolated string, then we need to append the string content
20126 // to the list of child nodes.
20127 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20128 parser_lex(parser);
20129
20130 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20131 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20132 // If we hit string content and the current node is a symbol node,
20133 // then we need to convert the current node into an interpolated
20134 // string and add the string content to the list of child nodes.
20135 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20136 pm_token_t bounds = not_provided(parser);
20137
20138 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20139 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20140 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20141 parser_lex(parser);
20142
20143 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20144 pm_interpolated_symbol_node_append(interpolated, first_string);
20145 pm_interpolated_symbol_node_append(interpolated, second_string);
20146
20147 xfree(current);
20148 current = (pm_node_t *) interpolated;
20149 } else {
20150 assert(false && "unreachable");
20151 }
20152
20153 break;
20154 }
20155 case PM_TOKEN_EMBVAR: {
20156 bool start_location_set = false;
20157 if (current == NULL) {
20158 // If we hit an embedded variable and the current node is NULL,
20159 // then this is the start of a new string. We'll set the current
20160 // node to a new interpolated string.
20161 pm_token_t opening = not_provided(parser);
20162 pm_token_t closing = not_provided(parser);
20163 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20164 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20165 // If we hit an embedded variable and the current node is a string
20166 // node, then we'll convert the current into an interpolated
20167 // string and add the string node to the list of parts.
20168 pm_token_t opening = not_provided(parser);
20169 pm_token_t closing = not_provided(parser);
20170 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20171
20172 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20173 pm_interpolated_symbol_node_append(interpolated, current);
20174 interpolated->base.location.start = current->location.start;
20175 start_location_set = true;
20176 current = (pm_node_t *) interpolated;
20177 } else {
20178 // If we hit an embedded variable and the current node is an
20179 // interpolated string, then we'll just add the embedded variable.
20180 }
20181
20182 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20183 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20184 if (!start_location_set) {
20185 current->location.start = part->location.start;
20186 }
20187 break;
20188 }
20189 case PM_TOKEN_EMBEXPR_BEGIN: {
20190 bool start_location_set = false;
20191 if (current == NULL) {
20192 // If we hit an embedded expression and the current node is NULL,
20193 // then this is the start of a new string. We'll set the current
20194 // node to a new interpolated string.
20195 pm_token_t opening = not_provided(parser);
20196 pm_token_t closing = not_provided(parser);
20197 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20198 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20199 // If we hit an embedded expression and the current node is a
20200 // string node, then we'll convert the current into an
20201 // interpolated string and add the string node to the list of
20202 // parts.
20203 pm_token_t opening = not_provided(parser);
20204 pm_token_t closing = not_provided(parser);
20205 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20206
20207 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20208 pm_interpolated_symbol_node_append(interpolated, current);
20209 interpolated->base.location.start = current->location.start;
20210 start_location_set = true;
20211 current = (pm_node_t *) interpolated;
20212 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20213 // If we hit an embedded expression and the current node is an
20214 // interpolated string, then we'll just continue on.
20215 } else {
20216 assert(false && "unreachable");
20217 }
20218
20219 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20220 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20221 if (!start_location_set) {
20222 current->location.start = part->location.start;
20223 }
20224 break;
20225 }
20226 default:
20227 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20228 parser_lex(parser);
20229 break;
20230 }
20231 }
20232
20233 // If we have a current node, then we need to append it to the list.
20234 if (current) {
20235 pm_array_node_elements_append(array, current);
20236 }
20237
20238 pm_token_t closing = parser->current;
20239 if (match1(parser, PM_TOKEN_EOF)) {
20240 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20241 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20242 } else {
20243 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20244 }
20245 pm_array_node_close_set(array, &closing);
20246
20247 return (pm_node_t *) array;
20248 }
20249 case PM_TOKEN_PERCENT_LOWER_W: {
20250 parser_lex(parser);
20251 pm_token_t opening = parser->previous;
20252 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20253
20254 // skip all leading whitespaces
20255 accept1(parser, PM_TOKEN_WORDS_SEP);
20256
20257 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20258 accept1(parser, PM_TOKEN_WORDS_SEP);
20259 if (match1(parser, PM_TOKEN_STRING_END)) break;
20260
20261 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20262 pm_token_t opening = not_provided(parser);
20263 pm_token_t closing = not_provided(parser);
20264
20265 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20266 pm_array_node_elements_append(array, string);
20267 }
20268
20269 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20270 }
20271
20272 pm_token_t closing = parser->current;
20273 if (match1(parser, PM_TOKEN_EOF)) {
20274 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20275 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20276 } else {
20277 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20278 }
20279
20280 pm_array_node_close_set(array, &closing);
20281 return (pm_node_t *) array;
20282 }
20283 case PM_TOKEN_PERCENT_UPPER_W: {
20284 parser_lex(parser);
20285 pm_token_t opening = parser->previous;
20286 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20287
20288 // This is the current node that we are parsing that will be added
20289 // to the list of elements.
20290 pm_node_t *current = NULL;
20291
20292 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20293 switch (parser->current.type) {
20294 case PM_TOKEN_WORDS_SEP: {
20295 // Reset the explicit encoding if we hit a separator
20296 // since each element can have its own encoding.
20297 parser->explicit_encoding = NULL;
20298
20299 if (current == NULL) {
20300 // If we hit a separator before we have any content,
20301 // then we don't need to do anything.
20302 } else {
20303 // If we hit a separator after we've hit content,
20304 // then we need to append that content to the list
20305 // and reset the current node.
20306 pm_array_node_elements_append(array, current);
20307 current = NULL;
20308 }
20309
20310 parser_lex(parser);
20311 break;
20312 }
20313 case PM_TOKEN_STRING_CONTENT: {
20314 pm_token_t opening = not_provided(parser);
20315 pm_token_t closing = not_provided(parser);
20316
20317 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20318 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20319 parser_lex(parser);
20320
20321 if (current == NULL) {
20322 // If we hit content and the current node is NULL,
20323 // then this is the first string content we've seen.
20324 // In that case we're going to create a new string
20325 // node and set that to the current.
20326 current = string;
20327 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20328 // If we hit string content and the current node is
20329 // an interpolated string, then we need to append
20330 // the string content to the list of child nodes.
20331 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20332 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20333 // If we hit string content and the current node is
20334 // a string node, then we need to convert the
20335 // current node into an interpolated string and add
20336 // the string content to the list of child nodes.
20337 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20338 pm_interpolated_string_node_append(interpolated, current);
20339 pm_interpolated_string_node_append(interpolated, string);
20340 current = (pm_node_t *) interpolated;
20341 } else {
20342 assert(false && "unreachable");
20343 }
20344
20345 break;
20346 }
20347 case PM_TOKEN_EMBVAR: {
20348 if (current == NULL) {
20349 // If we hit an embedded variable and the current
20350 // node is NULL, then this is the start of a new
20351 // string. We'll set the current node to a new
20352 // interpolated string.
20353 pm_token_t opening = not_provided(parser);
20354 pm_token_t closing = not_provided(parser);
20355 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20356 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20357 // If we hit an embedded variable and the current
20358 // node is a string node, then we'll convert the
20359 // current into an interpolated string and add the
20360 // string node to the list of parts.
20361 pm_token_t opening = not_provided(parser);
20362 pm_token_t closing = not_provided(parser);
20363 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20364 pm_interpolated_string_node_append(interpolated, current);
20365 current = (pm_node_t *) interpolated;
20366 } else {
20367 // If we hit an embedded variable and the current
20368 // node is an interpolated string, then we'll just
20369 // add the embedded variable.
20370 }
20371
20372 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20373 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20374 break;
20375 }
20376 case PM_TOKEN_EMBEXPR_BEGIN: {
20377 if (current == NULL) {
20378 // If we hit an embedded expression and the current
20379 // node is NULL, then this is the start of a new
20380 // string. We'll set the current node to a new
20381 // interpolated string.
20382 pm_token_t opening = not_provided(parser);
20383 pm_token_t closing = not_provided(parser);
20384 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20385 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20386 // If we hit an embedded expression and the current
20387 // node is a string node, then we'll convert the
20388 // current into an interpolated string and add the
20389 // string node to the list of parts.
20390 pm_token_t opening = not_provided(parser);
20391 pm_token_t closing = not_provided(parser);
20392 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20393 pm_interpolated_string_node_append(interpolated, current);
20394 current = (pm_node_t *) interpolated;
20395 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20396 // If we hit an embedded expression and the current
20397 // node is an interpolated string, then we'll just
20398 // continue on.
20399 } else {
20400 assert(false && "unreachable");
20401 }
20402
20403 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20404 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20405 break;
20406 }
20407 default:
20408 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20409 parser_lex(parser);
20410 break;
20411 }
20412 }
20413
20414 // If we have a current node, then we need to append it to the list.
20415 if (current) {
20416 pm_array_node_elements_append(array, current);
20417 }
20418
20419 pm_token_t closing = parser->current;
20420 if (match1(parser, PM_TOKEN_EOF)) {
20421 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20422 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20423 } else {
20424 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20425 }
20426
20427 pm_array_node_close_set(array, &closing);
20428 return (pm_node_t *) array;
20429 }
20430 case PM_TOKEN_REGEXP_BEGIN: {
20431 pm_token_t opening = parser->current;
20432 parser_lex(parser);
20433
20434 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20435 // If we get here, then we have an end immediately after a start. In
20436 // that case we'll create an empty content token and return an
20437 // uninterpolated regular expression.
20438 pm_token_t content = (pm_token_t) {
20439 .type = PM_TOKEN_STRING_CONTENT,
20440 .start = parser->previous.end,
20441 .end = parser->previous.end
20442 };
20443
20444 parser_lex(parser);
20445
20446 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20447 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
20448
20449 return node;
20450 }
20451
20453
20454 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20455 // In this case we've hit string content so we know the regular
20456 // expression at least has something in it. We'll need to check if the
20457 // following token is the end (in which case we can return a plain
20458 // regular expression) or if it's not then it has interpolation.
20459 pm_string_t unescaped = parser->current_string;
20460 pm_token_t content = parser->current;
20461 bool ascii_only = parser->current_regular_expression_ascii_only;
20462 parser_lex(parser);
20463
20464 // If we hit an end, then we can create a regular expression
20465 // node without interpolation, which can be represented more
20466 // succinctly and more easily compiled.
20467 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20468 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20469
20470 // If we're not immediately followed by a =~, then we want
20471 // to parse all of the errors at this point. If it is
20472 // followed by a =~, then it will get parsed higher up while
20473 // parsing the named captures as well.
20474 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20475 parse_regular_expression_errors(parser, node);
20476 }
20477
20478 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20479 return (pm_node_t *) node;
20480 }
20481
20482 // If we get here, then we have interpolation so we'll need to create
20483 // a regular expression node with interpolation.
20484 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20485
20486 pm_token_t opening = not_provided(parser);
20487 pm_token_t closing = not_provided(parser);
20488 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20489
20490 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20491 // This is extremely strange, but the first string part of a
20492 // regular expression will always be tagged as binary if we
20493 // are in a US-ASCII file, no matter its contents.
20494 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20495 }
20496
20497 pm_interpolated_regular_expression_node_append(interpolated, part);
20498 } else {
20499 // If the first part of the body of the regular expression is not a
20500 // string content, then we have interpolation and we need to create an
20501 // interpolated regular expression node.
20502 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20503 }
20504
20505 // Now that we're here and we have interpolation, we'll parse all of the
20506 // parts into the list.
20507 pm_node_t *part;
20508 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20509 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20510 pm_interpolated_regular_expression_node_append(interpolated, part);
20511 }
20512 }
20513
20514 pm_token_t closing = parser->current;
20515 if (match1(parser, PM_TOKEN_EOF)) {
20516 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20517 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20518 } else {
20519 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20520 }
20521
20522 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20523 return (pm_node_t *) interpolated;
20524 }
20525 case PM_TOKEN_BACKTICK:
20526 case PM_TOKEN_PERCENT_LOWER_X: {
20527 parser_lex(parser);
20528 pm_token_t opening = parser->previous;
20529
20530 // When we get here, we don't know if this string is going to have
20531 // interpolation or not, even though it is allowed. Still, we want to be
20532 // able to return a string node without interpolation if we can since
20533 // it'll be faster.
20534 if (match1(parser, PM_TOKEN_STRING_END)) {
20535 // If we get here, then we have an end immediately after a start. In
20536 // that case we'll create an empty content token and return an
20537 // uninterpolated string.
20538 pm_token_t content = (pm_token_t) {
20539 .type = PM_TOKEN_STRING_CONTENT,
20540 .start = parser->previous.end,
20541 .end = parser->previous.end
20542 };
20543
20544 parser_lex(parser);
20545 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20546 }
20547
20549
20550 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20551 // In this case we've hit string content so we know the string
20552 // at least has something in it. We'll need to check if the
20553 // following token is the end (in which case we can return a
20554 // plain string) or if it's not then it has interpolation.
20555 pm_string_t unescaped = parser->current_string;
20556 pm_token_t content = parser->current;
20557 parser_lex(parser);
20558
20559 if (match1(parser, PM_TOKEN_STRING_END)) {
20560 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20561 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20562 parser_lex(parser);
20563 return node;
20564 }
20565
20566 // If we get here, then we have interpolation so we'll need to
20567 // create a string node with interpolation.
20568 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20569
20570 pm_token_t opening = not_provided(parser);
20571 pm_token_t closing = not_provided(parser);
20572
20573 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20574 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20575
20576 pm_interpolated_xstring_node_append(node, part);
20577 } else {
20578 // If the first part of the body of the string is not a string
20579 // content, then we have interpolation and we need to create an
20580 // interpolated string node.
20581 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20582 }
20583
20584 pm_node_t *part;
20585 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20586 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20587 pm_interpolated_xstring_node_append(node, part);
20588 }
20589 }
20590
20591 pm_token_t closing = parser->current;
20592 if (match1(parser, PM_TOKEN_EOF)) {
20593 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20594 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20595 } else {
20596 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20597 }
20598 pm_interpolated_xstring_node_closing_set(node, &closing);
20599
20600 return (pm_node_t *) node;
20601 }
20602 case PM_TOKEN_USTAR: {
20603 parser_lex(parser);
20604
20605 // * operators at the beginning of expressions are only valid in the
20606 // context of a multiple assignment. We enforce that here. We'll
20607 // still lex past it though and create a missing node place.
20608 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20609 pm_parser_err_prefix(parser, diag_id);
20610 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20611 }
20612
20613 pm_token_t operator = parser->previous;
20614 pm_node_t *name = NULL;
20615
20616 if (token_begins_expression_p(parser->current.type)) {
20617 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20618 }
20619
20620 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20621
20622 if (match1(parser, PM_TOKEN_COMMA)) {
20623 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20624 } else {
20625 return parse_target_validate(parser, splat, true);
20626 }
20627 }
20628 case PM_TOKEN_BANG: {
20629 if (binding_power > PM_BINDING_POWER_UNARY) {
20630 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20631 }
20632
20633 parser_lex(parser);
20634
20635 pm_token_t operator = parser->previous;
20636 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20637 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20638
20639 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20640 return (pm_node_t *) node;
20641 }
20642 case PM_TOKEN_TILDE: {
20643 if (binding_power > PM_BINDING_POWER_UNARY) {
20644 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20645 }
20646 parser_lex(parser);
20647
20648 pm_token_t operator = parser->previous;
20649 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20650 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20651
20652 return (pm_node_t *) node;
20653 }
20654 case PM_TOKEN_UMINUS: {
20655 if (binding_power > PM_BINDING_POWER_UNARY) {
20656 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20657 }
20658 parser_lex(parser);
20659
20660 pm_token_t operator = parser->previous;
20661 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20662 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20663
20664 return (pm_node_t *) node;
20665 }
20666 case PM_TOKEN_UMINUS_NUM: {
20667 parser_lex(parser);
20668
20669 pm_token_t operator = parser->previous;
20670 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20671
20672 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20673 pm_token_t exponent_operator = parser->previous;
20674 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20675 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20676 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20677 } else {
20678 switch (PM_NODE_TYPE(node)) {
20679 case PM_INTEGER_NODE:
20680 case PM_FLOAT_NODE:
20681 case PM_RATIONAL_NODE:
20682 case PM_IMAGINARY_NODE:
20683 parse_negative_numeric(node);
20684 break;
20685 default:
20686 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20687 break;
20688 }
20689 }
20690
20691 return node;
20692 }
20693 case PM_TOKEN_MINUS_GREATER: {
20694 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20696
20697 size_t opening_newline_index = token_newline_index(parser);
20698 pm_accepts_block_stack_push(parser, true);
20699 parser_lex(parser);
20700
20701 pm_token_t operator = parser->previous;
20702 pm_parser_scope_push(parser, false);
20703
20704 pm_block_parameters_node_t *block_parameters;
20705
20706 switch (parser->current.type) {
20707 case PM_TOKEN_PARENTHESIS_LEFT: {
20708 pm_token_t opening = parser->current;
20709 parser_lex(parser);
20710
20711 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20712 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20713 } else {
20714 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20715 }
20716
20717 accept1(parser, PM_TOKEN_NEWLINE);
20718 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20719
20720 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20721 break;
20722 }
20723 case PM_CASE_PARAMETER: {
20724 pm_accepts_block_stack_push(parser, false);
20725 pm_token_t opening = not_provided(parser);
20726 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20727 pm_accepts_block_stack_pop(parser);
20728 break;
20729 }
20730 default: {
20731 block_parameters = NULL;
20732 break;
20733 }
20734 }
20735
20736 pm_token_t opening;
20737 pm_node_t *body = NULL;
20738 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20739
20740 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20741 opening = parser->previous;
20742
20743 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20744 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20745 }
20746
20747 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20748 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20749 } else {
20750 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20751 opening = parser->previous;
20752
20753 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20754 pm_accepts_block_stack_push(parser, true);
20755 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20756 pm_accepts_block_stack_pop(parser);
20757 }
20758
20759 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20760 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20761 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20762 } else {
20763 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20764 }
20765
20766 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20767 }
20768
20769 pm_constant_id_list_t locals;
20770 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20771 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20772
20773 pm_parser_scope_pop(parser);
20774 pm_accepts_block_stack_pop(parser);
20775
20776 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20777 }
20778 case PM_TOKEN_UPLUS: {
20779 if (binding_power > PM_BINDING_POWER_UNARY) {
20780 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20781 }
20782 parser_lex(parser);
20783
20784 pm_token_t operator = parser->previous;
20785 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20786 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20787
20788 return (pm_node_t *) node;
20789 }
20790 case PM_TOKEN_STRING_BEGIN:
20791 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20792 case PM_TOKEN_SYMBOL_BEGIN: {
20793 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20794 parser_lex(parser);
20795
20796 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20797 }
20798 default: {
20799 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20800
20801 if (recoverable != PM_CONTEXT_NONE) {
20802 parser->recovering = true;
20803
20804 // If the given error is not the generic one, then we'll add it
20805 // here because it will provide more context in addition to the
20806 // recoverable error that we will also add.
20807 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20808 pm_parser_err_prefix(parser, diag_id);
20809 }
20810
20811 // If we get here, then we are assuming this token is closing a
20812 // parent context, so we'll indicate that to the user so that
20813 // they know how we behaved.
20814 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20815 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20816 // We're going to make a special case here, because "cannot
20817 // parse expression" is pretty generic, and we know here that we
20818 // have an unexpected token.
20819 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20820 } else {
20821 pm_parser_err_prefix(parser, diag_id);
20822 }
20823
20824 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20825 }
20826 }
20827}
20828
20838static pm_node_t *
20839parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20840 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20841
20842 // Contradicting binding powers, the right-hand-side value of the assignment
20843 // allows the `rescue` modifier.
20844 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20845 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20846
20847 pm_token_t rescue = parser->current;
20848 parser_lex(parser);
20849
20850 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20851 context_pop(parser);
20852
20853 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20854 }
20855
20856 return value;
20857}
20858
20863static void
20864parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20865 switch (PM_NODE_TYPE(node)) {
20866 case PM_BEGIN_NODE: {
20867 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20868 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20869 break;
20870 }
20871 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20873 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20874 break;
20875 }
20876 case PM_PARENTHESES_NODE: {
20877 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20878 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20879 break;
20880 }
20881 case PM_STATEMENTS_NODE: {
20882 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20883 const pm_node_t *statement;
20884
20885 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20886 parse_assignment_value_local(parser, statement);
20887 }
20888 break;
20889 }
20890 default:
20891 break;
20892 }
20893}
20894
20907static pm_node_t *
20908parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20909 bool permitted = true;
20910 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20911
20912 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20913 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20914
20915 parse_assignment_value_local(parser, value);
20916 bool single_value = true;
20917
20918 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20919 single_value = false;
20920
20921 pm_token_t opening = not_provided(parser);
20922 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20923
20924 pm_array_node_elements_append(array, value);
20925 value = (pm_node_t *) array;
20926
20927 while (accept1(parser, PM_TOKEN_COMMA)) {
20928 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20929
20930 pm_array_node_elements_append(array, element);
20931 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20932
20933 parse_assignment_value_local(parser, element);
20934 }
20935 }
20936
20937 // Contradicting binding powers, the right-hand-side value of the assignment
20938 // allows the `rescue` modifier.
20939 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20940 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20941
20942 pm_token_t rescue = parser->current;
20943 parser_lex(parser);
20944
20945 bool accepts_command_call_inner = false;
20946
20947 // RHS can accept command call iff the value is a call with arguments
20948 // but without parenthesis.
20949 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20950 pm_call_node_t *call_node = (pm_call_node_t *) value;
20951 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20952 accepts_command_call_inner = true;
20953 }
20954 }
20955
20956 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20957 context_pop(parser);
20958
20959 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20960 }
20961
20962 return value;
20963}
20964
20972static void
20973parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20974 if (call_node->arguments != NULL) {
20975 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20976 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20977 call_node->arguments = NULL;
20978 }
20979
20980 if (call_node->block != NULL) {
20981 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20982 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20983 call_node->block = NULL;
20984 }
20985}
20986
21011
21012static inline const uint8_t *
21013pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21014 cursor++;
21015
21016 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21017 uint8_t value = escape_hexadecimal_digit(*cursor);
21018 cursor++;
21019
21020 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21021 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
21022 cursor++;
21023 }
21024
21025 pm_buffer_append_byte(unescaped, value);
21026 } else {
21027 pm_buffer_append_string(unescaped, "\\x", 2);
21028 }
21029
21030 return cursor;
21031}
21032
21033static inline const uint8_t *
21034pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21035 uint8_t value = (uint8_t) (*cursor - '0');
21036 cursor++;
21037
21038 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21039 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21040 cursor++;
21041
21042 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21043 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21044 cursor++;
21045 }
21046 }
21047
21048 pm_buffer_append_byte(unescaped, value);
21049 return cursor;
21050}
21051
21052static inline const uint8_t *
21053pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21054 const uint8_t *start = cursor - 1;
21055 cursor++;
21056
21057 if (cursor >= end) {
21058 pm_buffer_append_string(unescaped, "\\u", 2);
21059 return cursor;
21060 }
21061
21062 if (*cursor != '{') {
21063 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
21064 uint32_t value = escape_unicode(parser, cursor, length);
21065
21066 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
21067 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
21068 }
21069
21070 return cursor + length;
21071 }
21072
21073 cursor++;
21074 for (;;) {
21075 while (cursor < end && *cursor == ' ') cursor++;
21076
21077 if (cursor >= end) break;
21078 if (*cursor == '}') {
21079 cursor++;
21080 break;
21081 }
21082
21083 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
21084 uint32_t value = escape_unicode(parser, cursor, length);
21085
21086 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
21087 cursor += length;
21088 }
21089
21090 return cursor;
21091}
21092
21093static void
21094pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21095 const uint8_t *end = source + length;
21096 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21097
21098 for (;;) {
21099 if (++cursor >= end) {
21100 pm_buffer_append_byte(unescaped, '\\');
21101 return;
21102 }
21103
21104 switch (*cursor) {
21105 case 'x':
21106 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21107 break;
21108 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21109 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21110 break;
21111 case 'u':
21112 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21113 break;
21114 default:
21115 pm_buffer_append_byte(unescaped, '\\');
21116 break;
21117 }
21118
21119 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21120 if (next_cursor == NULL) break;
21121
21122 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21123 cursor = next_cursor;
21124 }
21125
21126 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21127}
21128
21133static void
21134parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21136
21137 pm_parser_t *parser = callback_data->parser;
21138 pm_call_node_t *call = callback_data->call;
21139 pm_constant_id_list_t *names = &callback_data->names;
21140
21141 const uint8_t *source = pm_string_source(capture);
21142 size_t length = pm_string_length(capture);
21143 pm_buffer_t unescaped = { 0 };
21144
21145 // First, we need to handle escapes within the name of the capture group.
21146 // This is because regular expressions have three different representations
21147 // in prism. The first is the plain source code. The second is the
21148 // representation that will be sent to the regular expression engine, which
21149 // is the value of the "unescaped" field. This is poorly named, because it
21150 // actually still contains escapes, just a subset of them that the regular
21151 // expression engine knows how to handle. The third representation is fully
21152 // unescaped, which is what we need.
21153 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21154 if (PRISM_UNLIKELY(cursor != NULL)) {
21155 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21156 source = (const uint8_t *) pm_buffer_value(&unescaped);
21157 length = pm_buffer_length(&unescaped);
21158 }
21159
21160 pm_location_t location;
21161 pm_constant_id_t name;
21162
21163 // If the name of the capture group isn't a valid identifier, we do
21164 // not add it to the local table.
21165 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21166 pm_buffer_free(&unescaped);
21167 return;
21168 }
21169
21170 if (callback_data->shared) {
21171 // If the unescaped string is a slice of the source, then we can
21172 // copy the names directly. The pointers will line up.
21173 location = (pm_location_t) { .start = source, .end = source + length };
21174 name = pm_parser_constant_id_location(parser, location.start, location.end);
21175 } else {
21176 // Otherwise, the name is a slice of the malloc-ed owned string,
21177 // in which case we need to copy it out into a new string.
21178 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21179
21180 void *memory = xmalloc(length);
21181 if (memory == NULL) abort();
21182
21183 memcpy(memory, source, length);
21184 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21185 }
21186
21187 // Add this name to the list of constants if it is valid, not duplicated,
21188 // and not a keyword.
21189 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21190 pm_constant_id_list_append(names, name);
21191
21192 int depth;
21193 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21194 // If the local is not already a local but it is a keyword, then we
21195 // do not want to add a capture for this.
21196 if (pm_local_is_keyword((const char *) source, length)) {
21197 pm_buffer_free(&unescaped);
21198 return;
21199 }
21200
21201 // If the identifier is not already a local, then we will add it to
21202 // the local table.
21203 pm_parser_local_add(parser, name, location.start, location.end, 0);
21204 }
21205
21206 // Here we lazily create the MatchWriteNode since we know we're
21207 // about to add a target.
21208 if (callback_data->match == NULL) {
21209 callback_data->match = pm_match_write_node_create(parser, call);
21210 }
21211
21212 // Next, create the local variable target and add it to the list of
21213 // targets for the match.
21214 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21215 pm_node_list_append(&callback_data->match->targets, target);
21216 }
21217
21218 pm_buffer_free(&unescaped);
21219}
21220
21225static pm_node_t *
21226parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21228 .parser = parser,
21229 .call = call,
21230 .names = { 0 },
21231 .shared = content->type == PM_STRING_SHARED
21232 };
21233
21235 .parser = parser,
21236 .start = call->receiver->location.start,
21237 .end = call->receiver->location.end,
21238 .shared = content->type == PM_STRING_SHARED
21239 };
21240
21241 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21242 pm_constant_id_list_free(&callback_data.names);
21243
21244 if (callback_data.match != NULL) {
21245 return (pm_node_t *) callback_data.match;
21246 } else {
21247 return (pm_node_t *) call;
21248 }
21249}
21250
21251static inline pm_node_t *
21252parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21253 pm_token_t token = parser->current;
21254
21255 switch (token.type) {
21256 case PM_TOKEN_EQUAL: {
21257 switch (PM_NODE_TYPE(node)) {
21258 case PM_CALL_NODE: {
21259 // If we have no arguments to the call node and we need this
21260 // to be a target then this is either a method call or a
21261 // local variable write. This _must_ happen before the value
21262 // is parsed because it could be referenced in the value.
21263 pm_call_node_t *call_node = (pm_call_node_t *) node;
21264 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21265 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21266 }
21267 }
21269 case PM_CASE_WRITABLE: {
21270 // When we have `it = value`, we need to add `it` as a local
21271 // variable before parsing the value, in case the value
21272 // references the variable.
21273 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21274 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
21275 }
21276
21277 parser_lex(parser);
21278 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21279
21280 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21281 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21282 }
21283
21284 return parse_write(parser, node, &token, value);
21285 }
21286 case PM_SPLAT_NODE: {
21287 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21288 pm_multi_target_node_targets_append(parser, multi_target, node);
21289
21290 parser_lex(parser);
21291 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21292 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21293 }
21294 case PM_SOURCE_ENCODING_NODE:
21295 case PM_FALSE_NODE:
21296 case PM_SOURCE_FILE_NODE:
21297 case PM_SOURCE_LINE_NODE:
21298 case PM_NIL_NODE:
21299 case PM_SELF_NODE:
21300 case PM_TRUE_NODE: {
21301 // In these special cases, we have specific error messages
21302 // and we will replace them with local variable writes.
21303 parser_lex(parser);
21304 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21305 return parse_unwriteable_write(parser, node, &token, value);
21306 }
21307 default:
21308 // In this case we have an = sign, but we don't know what
21309 // it's for. We need to treat it as an error. We'll mark it
21310 // as an error and skip past it.
21311 parser_lex(parser);
21312 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21313 return node;
21314 }
21315 }
21316 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21317 switch (PM_NODE_TYPE(node)) {
21318 case PM_BACK_REFERENCE_READ_NODE:
21319 case PM_NUMBERED_REFERENCE_READ_NODE:
21320 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21322 case PM_GLOBAL_VARIABLE_READ_NODE: {
21323 parser_lex(parser);
21324
21325 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21326 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21327
21328 pm_node_destroy(parser, node);
21329 return result;
21330 }
21331 case PM_CLASS_VARIABLE_READ_NODE: {
21332 parser_lex(parser);
21333
21334 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21335 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21336
21337 pm_node_destroy(parser, node);
21338 return result;
21339 }
21340 case PM_CONSTANT_PATH_NODE: {
21341 parser_lex(parser);
21342
21343 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21344 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21345
21346 return parse_shareable_constant_write(parser, write);
21347 }
21348 case PM_CONSTANT_READ_NODE: {
21349 parser_lex(parser);
21350
21351 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21352 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21353
21354 pm_node_destroy(parser, node);
21355 return parse_shareable_constant_write(parser, write);
21356 }
21357 case PM_INSTANCE_VARIABLE_READ_NODE: {
21358 parser_lex(parser);
21359
21360 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21361 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21362
21363 pm_node_destroy(parser, node);
21364 return result;
21365 }
21366 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21367 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21368 parser_lex(parser);
21369
21370 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21371 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21372
21373 parse_target_implicit_parameter(parser, node);
21374 pm_node_destroy(parser, node);
21375 return result;
21376 }
21377 case PM_LOCAL_VARIABLE_READ_NODE: {
21378 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21379 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21380 parse_target_implicit_parameter(parser, node);
21381 }
21382
21384 parser_lex(parser);
21385
21386 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21387 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21388
21389 pm_node_destroy(parser, node);
21390 return result;
21391 }
21392 case PM_CALL_NODE: {
21393 pm_call_node_t *cast = (pm_call_node_t *) node;
21394
21395 // If we have a vcall (a method with no arguments and no
21396 // receiver that could have been a local variable) then we
21397 // will transform it into a local variable write.
21398 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21399 pm_location_t *message_loc = &cast->message_loc;
21400 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21401
21402 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21403 parser_lex(parser);
21404
21405 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21406 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21407
21408 pm_node_destroy(parser, (pm_node_t *) cast);
21409 return result;
21410 }
21411
21412 // Move past the token here so that we have already added
21413 // the local variable by this point.
21414 parser_lex(parser);
21415
21416 // If there is no call operator and the message is "[]" then
21417 // this is an aref expression, and we can transform it into
21418 // an aset expression.
21419 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21420 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21421 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21422 }
21423
21424 // If this node cannot be writable, then we have an error.
21425 if (pm_call_node_writable_p(parser, cast)) {
21426 parse_write_name(parser, &cast->name);
21427 } else {
21428 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21429 }
21430
21431 parse_call_operator_write(parser, cast, &token);
21432 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21433 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21434 }
21435 case PM_MULTI_WRITE_NODE: {
21436 parser_lex(parser);
21437 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21438 return node;
21439 }
21440 default:
21441 parser_lex(parser);
21442
21443 // In this case we have an &&= sign, but we don't know what it's for.
21444 // We need to treat it as an error. For now, we'll mark it as an error
21445 // and just skip right past it.
21446 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21447 return node;
21448 }
21449 }
21450 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21451 switch (PM_NODE_TYPE(node)) {
21452 case PM_BACK_REFERENCE_READ_NODE:
21453 case PM_NUMBERED_REFERENCE_READ_NODE:
21454 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21456 case PM_GLOBAL_VARIABLE_READ_NODE: {
21457 parser_lex(parser);
21458
21459 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21460 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21461
21462 pm_node_destroy(parser, node);
21463 return result;
21464 }
21465 case PM_CLASS_VARIABLE_READ_NODE: {
21466 parser_lex(parser);
21467
21468 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21469 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21470
21471 pm_node_destroy(parser, node);
21472 return result;
21473 }
21474 case PM_CONSTANT_PATH_NODE: {
21475 parser_lex(parser);
21476
21477 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21478 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21479
21480 return parse_shareable_constant_write(parser, write);
21481 }
21482 case PM_CONSTANT_READ_NODE: {
21483 parser_lex(parser);
21484
21485 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21486 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21487
21488 pm_node_destroy(parser, node);
21489 return parse_shareable_constant_write(parser, write);
21490 }
21491 case PM_INSTANCE_VARIABLE_READ_NODE: {
21492 parser_lex(parser);
21493
21494 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21495 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21496
21497 pm_node_destroy(parser, node);
21498 return result;
21499 }
21500 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21501 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21502 parser_lex(parser);
21503
21504 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21505 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21506
21507 parse_target_implicit_parameter(parser, node);
21508 pm_node_destroy(parser, node);
21509 return result;
21510 }
21511 case PM_LOCAL_VARIABLE_READ_NODE: {
21512 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21513 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21514 parse_target_implicit_parameter(parser, node);
21515 }
21516
21518 parser_lex(parser);
21519
21520 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21521 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21522
21523 pm_node_destroy(parser, node);
21524 return result;
21525 }
21526 case PM_CALL_NODE: {
21527 pm_call_node_t *cast = (pm_call_node_t *) node;
21528
21529 // If we have a vcall (a method with no arguments and no
21530 // receiver that could have been a local variable) then we
21531 // will transform it into a local variable write.
21532 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21533 pm_location_t *message_loc = &cast->message_loc;
21534 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21535
21536 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21537 parser_lex(parser);
21538
21539 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21540 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21541
21542 pm_node_destroy(parser, (pm_node_t *) cast);
21543 return result;
21544 }
21545
21546 // Move past the token here so that we have already added
21547 // the local variable by this point.
21548 parser_lex(parser);
21549
21550 // If there is no call operator and the message is "[]" then
21551 // this is an aref expression, and we can transform it into
21552 // an aset expression.
21553 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21554 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21555 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21556 }
21557
21558 // If this node cannot be writable, then we have an error.
21559 if (pm_call_node_writable_p(parser, cast)) {
21560 parse_write_name(parser, &cast->name);
21561 } else {
21562 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21563 }
21564
21565 parse_call_operator_write(parser, cast, &token);
21566 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21567 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21568 }
21569 case PM_MULTI_WRITE_NODE: {
21570 parser_lex(parser);
21571 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21572 return node;
21573 }
21574 default:
21575 parser_lex(parser);
21576
21577 // In this case we have an ||= sign, but we don't know what it's for.
21578 // We need to treat it as an error. For now, we'll mark it as an error
21579 // and just skip right past it.
21580 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21581 return node;
21582 }
21583 }
21584 case PM_TOKEN_AMPERSAND_EQUAL:
21585 case PM_TOKEN_CARET_EQUAL:
21586 case PM_TOKEN_GREATER_GREATER_EQUAL:
21587 case PM_TOKEN_LESS_LESS_EQUAL:
21588 case PM_TOKEN_MINUS_EQUAL:
21589 case PM_TOKEN_PERCENT_EQUAL:
21590 case PM_TOKEN_PIPE_EQUAL:
21591 case PM_TOKEN_PLUS_EQUAL:
21592 case PM_TOKEN_SLASH_EQUAL:
21593 case PM_TOKEN_STAR_EQUAL:
21594 case PM_TOKEN_STAR_STAR_EQUAL: {
21595 switch (PM_NODE_TYPE(node)) {
21596 case PM_BACK_REFERENCE_READ_NODE:
21597 case PM_NUMBERED_REFERENCE_READ_NODE:
21598 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21600 case PM_GLOBAL_VARIABLE_READ_NODE: {
21601 parser_lex(parser);
21602
21603 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21604 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21605
21606 pm_node_destroy(parser, node);
21607 return result;
21608 }
21609 case PM_CLASS_VARIABLE_READ_NODE: {
21610 parser_lex(parser);
21611
21612 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21613 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21614
21615 pm_node_destroy(parser, node);
21616 return result;
21617 }
21618 case PM_CONSTANT_PATH_NODE: {
21619 parser_lex(parser);
21620
21621 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21622 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21623
21624 return parse_shareable_constant_write(parser, write);
21625 }
21626 case PM_CONSTANT_READ_NODE: {
21627 parser_lex(parser);
21628
21629 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21630 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21631
21632 pm_node_destroy(parser, node);
21633 return parse_shareable_constant_write(parser, write);
21634 }
21635 case PM_INSTANCE_VARIABLE_READ_NODE: {
21636 parser_lex(parser);
21637
21638 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21639 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21640
21641 pm_node_destroy(parser, node);
21642 return result;
21643 }
21644 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21645 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21646 parser_lex(parser);
21647
21648 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21649 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21650
21651 parse_target_implicit_parameter(parser, node);
21652 pm_node_destroy(parser, node);
21653 return result;
21654 }
21655 case PM_LOCAL_VARIABLE_READ_NODE: {
21656 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21657 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21658 parse_target_implicit_parameter(parser, node);
21659 }
21660
21662 parser_lex(parser);
21663
21664 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21665 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21666
21667 pm_node_destroy(parser, node);
21668 return result;
21669 }
21670 case PM_CALL_NODE: {
21671 parser_lex(parser);
21672 pm_call_node_t *cast = (pm_call_node_t *) node;
21673
21674 // If we have a vcall (a method with no arguments and no
21675 // receiver that could have been a local variable) then we
21676 // will transform it into a local variable write.
21677 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21678 pm_location_t *message_loc = &cast->message_loc;
21679 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21680
21681 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21682 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21683 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21684
21685 pm_node_destroy(parser, (pm_node_t *) cast);
21686 return result;
21687 }
21688
21689 // If there is no call operator and the message is "[]" then
21690 // this is an aref expression, and we can transform it into
21691 // an aset expression.
21692 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21693 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21694 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21695 }
21696
21697 // If this node cannot be writable, then we have an error.
21698 if (pm_call_node_writable_p(parser, cast)) {
21699 parse_write_name(parser, &cast->name);
21700 } else {
21701 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21702 }
21703
21704 parse_call_operator_write(parser, cast, &token);
21705 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21706 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21707 }
21708 case PM_MULTI_WRITE_NODE: {
21709 parser_lex(parser);
21710 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21711 return node;
21712 }
21713 default:
21714 parser_lex(parser);
21715
21716 // In this case we have an operator but we don't know what it's for.
21717 // We need to treat it as an error. For now, we'll mark it as an error
21718 // and just skip right past it.
21719 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21720 return node;
21721 }
21722 }
21723 case PM_TOKEN_AMPERSAND_AMPERSAND:
21724 case PM_TOKEN_KEYWORD_AND: {
21725 parser_lex(parser);
21726
21727 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21728 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21729 }
21730 case PM_TOKEN_KEYWORD_OR:
21731 case PM_TOKEN_PIPE_PIPE: {
21732 parser_lex(parser);
21733
21734 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21735 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21736 }
21737 case PM_TOKEN_EQUAL_TILDE: {
21738 // Note that we _must_ parse the value before adding the local
21739 // variables in order to properly mirror the behavior of Ruby. For
21740 // example,
21741 //
21742 // /(?<foo>bar)/ =~ foo
21743 //
21744 // In this case, `foo` should be a method call and not a local yet.
21745 parser_lex(parser);
21746 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21747
21748 // By default, we're going to create a call node and then return it.
21749 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21750 pm_node_t *result = (pm_node_t *) call;
21751
21752 // If the receiver of this =~ is a regular expression node, then we
21753 // need to introduce local variables for it based on its named
21754 // capture groups.
21755 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21756 // It's possible to have an interpolated regular expression node
21757 // that only contains strings. This is because it can be split
21758 // up by a heredoc. In this case we need to concat the unescaped
21759 // strings together and then parse them as a regular expression.
21761
21762 bool interpolated = false;
21763 size_t total_length = 0;
21764
21765 pm_node_t *part;
21766 PM_NODE_LIST_FOREACH(parts, index, part) {
21767 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21768 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21769 } else {
21770 interpolated = true;
21771 break;
21772 }
21773 }
21774
21775 if (!interpolated && total_length > 0) {
21776 void *memory = xmalloc(total_length);
21777 if (!memory) abort();
21778
21779 uint8_t *cursor = memory;
21780 PM_NODE_LIST_FOREACH(parts, index, part) {
21781 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21782 size_t length = pm_string_length(unescaped);
21783
21784 memcpy(cursor, pm_string_source(unescaped), length);
21785 cursor += length;
21786 }
21787
21788 pm_string_t owned;
21789 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21790
21791 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21792 pm_string_free(&owned);
21793 }
21794 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21795 // If we have a regular expression node, then we can just parse
21796 // the named captures directly off the unescaped string.
21797 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21798 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21799 }
21800
21801 return result;
21802 }
21803 case PM_TOKEN_UAMPERSAND:
21804 case PM_TOKEN_USTAR:
21805 case PM_TOKEN_USTAR_STAR:
21806 // The only times this will occur are when we are in an error state,
21807 // but we'll put them in here so that errors can propagate.
21808 case PM_TOKEN_BANG_EQUAL:
21809 case PM_TOKEN_BANG_TILDE:
21810 case PM_TOKEN_EQUAL_EQUAL:
21811 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21812 case PM_TOKEN_LESS_EQUAL_GREATER:
21813 case PM_TOKEN_CARET:
21814 case PM_TOKEN_PIPE:
21815 case PM_TOKEN_AMPERSAND:
21816 case PM_TOKEN_GREATER_GREATER:
21817 case PM_TOKEN_LESS_LESS:
21818 case PM_TOKEN_MINUS:
21819 case PM_TOKEN_PLUS:
21820 case PM_TOKEN_PERCENT:
21821 case PM_TOKEN_SLASH:
21822 case PM_TOKEN_STAR:
21823 case PM_TOKEN_STAR_STAR: {
21824 parser_lex(parser);
21825 pm_token_t operator = parser->previous;
21826 switch (PM_NODE_TYPE(node)) {
21827 case PM_RESCUE_MODIFIER_NODE: {
21829 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21830 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21831 }
21832 break;
21833 }
21834 case PM_AND_NODE: {
21835 pm_and_node_t *cast = (pm_and_node_t *) node;
21836 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21837 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21838 }
21839 break;
21840 }
21841 case PM_OR_NODE: {
21842 pm_or_node_t *cast = (pm_or_node_t *) node;
21843 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21844 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21845 }
21846 break;
21847 }
21848 default:
21849 break;
21850 }
21851
21852 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21853 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21854 }
21855 case PM_TOKEN_GREATER:
21856 case PM_TOKEN_GREATER_EQUAL:
21857 case PM_TOKEN_LESS:
21858 case PM_TOKEN_LESS_EQUAL: {
21859 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21860 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21861 }
21862
21863 parser_lex(parser);
21864 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21865 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21866 }
21867 case PM_TOKEN_AMPERSAND_DOT:
21868 case PM_TOKEN_DOT: {
21869 parser_lex(parser);
21870 pm_token_t operator = parser->previous;
21871 pm_arguments_t arguments = { 0 };
21872
21873 // This if statement handles the foo.() syntax.
21874 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21875 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21876 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21877 }
21878
21879 switch (PM_NODE_TYPE(node)) {
21880 case PM_RESCUE_MODIFIER_NODE: {
21882 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21883 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21884 }
21885 break;
21886 }
21887 case PM_AND_NODE: {
21888 pm_and_node_t *cast = (pm_and_node_t *) node;
21889 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21890 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21891 }
21892 break;
21893 }
21894 case PM_OR_NODE: {
21895 pm_or_node_t *cast = (pm_or_node_t *) node;
21896 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21897 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21898 }
21899 break;
21900 }
21901 default:
21902 break;
21903 }
21904
21905 pm_token_t message;
21906
21907 switch (parser->current.type) {
21908 case PM_CASE_OPERATOR:
21909 case PM_CASE_KEYWORD:
21910 case PM_TOKEN_CONSTANT:
21911 case PM_TOKEN_IDENTIFIER:
21912 case PM_TOKEN_METHOD_NAME: {
21913 parser_lex(parser);
21914 message = parser->previous;
21915 break;
21916 }
21917 default: {
21918 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21919 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21920 }
21921 }
21922
21923 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21924 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21925
21926 if (
21927 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21928 arguments.arguments == NULL &&
21929 arguments.opening_loc.start == NULL &&
21930 match1(parser, PM_TOKEN_COMMA)
21931 ) {
21932 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21933 } else {
21934 return (pm_node_t *) call;
21935 }
21936 }
21937 case PM_TOKEN_DOT_DOT:
21938 case PM_TOKEN_DOT_DOT_DOT: {
21939 parser_lex(parser);
21940
21941 pm_node_t *right = NULL;
21942 if (token_begins_expression_p(parser->current.type)) {
21943 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21944 }
21945
21946 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21947 }
21948 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21949 pm_token_t keyword = parser->current;
21950 parser_lex(parser);
21951
21952 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21953 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21954 }
21955 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21956 pm_token_t keyword = parser->current;
21957 parser_lex(parser);
21958
21959 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21960 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21961 }
21962 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21963 parser_lex(parser);
21964 pm_statements_node_t *statements = pm_statements_node_create(parser);
21965 pm_statements_node_body_append(parser, statements, node, true);
21966
21967 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21968 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21969 }
21970 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21971 parser_lex(parser);
21972 pm_statements_node_t *statements = pm_statements_node_create(parser);
21973 pm_statements_node_body_append(parser, statements, node, true);
21974
21975 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21976 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21977 }
21978 case PM_TOKEN_QUESTION_MARK: {
21979 context_push(parser, PM_CONTEXT_TERNARY);
21980 pm_node_list_t current_block_exits = { 0 };
21981 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21982
21983 pm_token_t qmark = parser->current;
21984 parser_lex(parser);
21985
21986 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21987
21988 if (parser->recovering) {
21989 // If parsing the true expression of this ternary resulted in a syntax
21990 // error that we can recover from, then we're going to put missing nodes
21991 // and tokens into the remaining places. We want to be sure to do this
21992 // before the `expect` function call to make sure it doesn't
21993 // accidentally move past a ':' token that occurs after the syntax
21994 // error.
21995 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21996 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21997
21998 context_pop(parser);
21999 pop_block_exits(parser, previous_block_exits);
22000 pm_node_list_free(&current_block_exits);
22001
22002 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22003 }
22004
22005 accept1(parser, PM_TOKEN_NEWLINE);
22006 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
22007
22008 pm_token_t colon = parser->previous;
22009 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
22010
22011 context_pop(parser);
22012 pop_block_exits(parser, previous_block_exits);
22013 pm_node_list_free(&current_block_exits);
22014
22015 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22016 }
22017 case PM_TOKEN_COLON_COLON: {
22018 parser_lex(parser);
22019 pm_token_t delimiter = parser->previous;
22020
22021 switch (parser->current.type) {
22022 case PM_TOKEN_CONSTANT: {
22023 parser_lex(parser);
22024 pm_node_t *path;
22025
22026 if (
22027 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
22028 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
22029 ) {
22030 // If we have a constant immediately following a '::' operator, then
22031 // this can either be a constant path or a method call, depending on
22032 // what follows the constant.
22033 //
22034 // If we have parentheses, then this is a method call. That would
22035 // look like Foo::Bar().
22036 pm_token_t message = parser->previous;
22037 pm_arguments_t arguments = { 0 };
22038
22039 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22040 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22041 } else {
22042 // Otherwise, this is a constant path. That would look like Foo::Bar.
22043 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22044 }
22045
22046 // If this is followed by a comma then it is a multiple assignment.
22047 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22048 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22049 }
22050
22051 return path;
22052 }
22053 case PM_CASE_OPERATOR:
22054 case PM_CASE_KEYWORD:
22055 case PM_TOKEN_IDENTIFIER:
22056 case PM_TOKEN_METHOD_NAME: {
22057 parser_lex(parser);
22058 pm_token_t message = parser->previous;
22059
22060 // If we have an identifier following a '::' operator, then it is for
22061 // sure a method call.
22062 pm_arguments_t arguments = { 0 };
22063 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22064 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22065
22066 // If this is followed by a comma then it is a multiple assignment.
22067 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22068 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22069 }
22070
22071 return (pm_node_t *) call;
22072 }
22073 case PM_TOKEN_PARENTHESIS_LEFT: {
22074 // If we have a parenthesis following a '::' operator, then it is the
22075 // method call shorthand. That would look like Foo::(bar).
22076 pm_arguments_t arguments = { 0 };
22077 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
22078
22079 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
22080 }
22081 default: {
22082 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
22083 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22084 }
22085 }
22086 }
22087 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
22088 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
22089 parser_lex(parser);
22090 accept1(parser, PM_TOKEN_NEWLINE);
22091
22092 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
22093 context_pop(parser);
22094
22095 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
22096 }
22097 case PM_TOKEN_BRACKET_LEFT: {
22098 parser_lex(parser);
22099
22100 pm_arguments_t arguments = { 0 };
22101 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22102
22103 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22104 pm_accepts_block_stack_push(parser, true);
22105 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22106 pm_accepts_block_stack_pop(parser);
22107 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22108 }
22109
22110 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22111
22112 // If we have a comma after the closing bracket then this is a multiple
22113 // assignment and we should parse the targets.
22114 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22115 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22116 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22117 }
22118
22119 // If we're at the end of the arguments, we can now check if there is a
22120 // block node that starts with a {. If there is, then we can parse it and
22121 // add it to the arguments.
22122 pm_block_node_t *block = NULL;
22123 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22124 block = parse_block(parser, (uint16_t) (depth + 1));
22125 pm_arguments_validate_block(parser, &arguments, block);
22126 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22127 block = parse_block(parser, (uint16_t) (depth + 1));
22128 }
22129
22130 if (block != NULL) {
22131 if (arguments.block != NULL) {
22132 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22133 if (arguments.arguments == NULL) {
22134 arguments.arguments = pm_arguments_node_create(parser);
22135 }
22136 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22137 }
22138
22139 arguments.block = (pm_node_t *) block;
22140 }
22141
22142 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22143 }
22144 case PM_TOKEN_KEYWORD_IN: {
22145 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22146 parser->pattern_matching_newlines = true;
22147
22148 pm_token_t operator = parser->current;
22149 parser->command_start = false;
22150 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22151 parser_lex(parser);
22152
22153 pm_constant_id_list_t captures = { 0 };
22154 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22155
22156 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22157 pm_constant_id_list_free(&captures);
22158
22159 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22160 }
22161 case PM_TOKEN_EQUAL_GREATER: {
22162 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22163 parser->pattern_matching_newlines = true;
22164
22165 pm_token_t operator = parser->current;
22166 parser->command_start = false;
22167 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22168 parser_lex(parser);
22169
22170 pm_constant_id_list_t captures = { 0 };
22171 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22172
22173 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22174 pm_constant_id_list_free(&captures);
22175
22176 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22177 }
22178 default:
22179 assert(false && "unreachable");
22180 return NULL;
22181 }
22182}
22183
22184#undef PM_PARSE_PATTERN_SINGLE
22185#undef PM_PARSE_PATTERN_TOP
22186#undef PM_PARSE_PATTERN_MULTI
22187
22192static inline bool
22193pm_call_node_command_p(const pm_call_node_t *node) {
22194 return (
22195 (node->opening_loc.start == NULL) &&
22196 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22197 (node->arguments != NULL || node->block != NULL)
22198 );
22199}
22200
22209static pm_node_t *
22210parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22211 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22212 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22213 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22214 }
22215
22216 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22217
22218 switch (PM_NODE_TYPE(node)) {
22219 case PM_MISSING_NODE:
22220 // If we found a syntax error, then the type of node returned by
22221 // parse_expression_prefix is going to be a missing node.
22222 return node;
22223 case PM_PRE_EXECUTION_NODE:
22224 case PM_POST_EXECUTION_NODE:
22225 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22226 case PM_ALIAS_METHOD_NODE:
22227 case PM_MULTI_WRITE_NODE:
22228 case PM_UNDEF_NODE:
22229 // These expressions are statements, and cannot be followed by
22230 // operators (except modifiers).
22231 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22232 return node;
22233 }
22234 break;
22235 case PM_CALL_NODE:
22236 // If we have a call node, then we need to check if it looks like a
22237 // method call without parentheses that contains arguments. If it
22238 // does, then it has different rules for parsing infix operators,
22239 // namely that it only accepts composition (and/or) and modifiers
22240 // (if/unless/etc.).
22241 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22242 return node;
22243 }
22244 break;
22245 case PM_SYMBOL_NODE:
22246 // If we have a symbol node that is being parsed as a label, then we
22247 // need to immediately return, because there should never be an
22248 // infix operator following this node.
22249 if (pm_symbol_node_label_p(node)) {
22250 return node;
22251 }
22252 break;
22253 default:
22254 break;
22255 }
22256
22257 // Otherwise we'll look and see if the next token can be parsed as an infix
22258 // operator. If it can, then we'll parse it using parse_expression_infix.
22259 pm_binding_powers_t current_binding_powers;
22260 pm_token_type_t current_token_type;
22261
22262 while (
22263 current_token_type = parser->current.type,
22264 current_binding_powers = pm_binding_powers[current_token_type],
22265 binding_power <= current_binding_powers.left &&
22266 current_binding_powers.binary
22267 ) {
22268 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22269
22270 if (context_terminator(parser->current_context->context, &parser->current)) {
22271 // If this token terminates the current context, then we need to
22272 // stop parsing the expression, as it has become a statement.
22273 return node;
22274 }
22275
22276 switch (PM_NODE_TYPE(node)) {
22277 case PM_MULTI_WRITE_NODE:
22278 // Multi-write nodes are statements, and cannot be followed by
22279 // operators except modifiers.
22280 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22281 return node;
22282 }
22283 break;
22284 case PM_CLASS_VARIABLE_WRITE_NODE:
22285 case PM_CONSTANT_PATH_WRITE_NODE:
22286 case PM_CONSTANT_WRITE_NODE:
22287 case PM_GLOBAL_VARIABLE_WRITE_NODE:
22288 case PM_INSTANCE_VARIABLE_WRITE_NODE:
22289 case PM_LOCAL_VARIABLE_WRITE_NODE:
22290 // These expressions are statements, by virtue of the right-hand
22291 // side of their write being an implicit array.
22292 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22293 return node;
22294 }
22295 break;
22296 case PM_CALL_NODE:
22297 // These expressions are also statements, by virtue of the
22298 // right-hand side of the expression (i.e., the last argument to
22299 // the call node) being an implicit array.
22300 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22301 return node;
22302 }
22303 break;
22304 default:
22305 break;
22306 }
22307
22308 // If the operator is nonassoc and we should not be able to parse the
22309 // upcoming infix operator, break.
22310 if (current_binding_powers.nonassoc) {
22311 // If this is a non-assoc operator and we are about to parse the
22312 // exact same operator, then we need to add an error.
22313 if (match1(parser, current_token_type)) {
22314 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22315 break;
22316 }
22317
22318 // If this is an endless range, then we need to reject a couple of
22319 // additional operators because it violates the normal operator
22320 // precedence rules. Those patterns are:
22321 //
22322 // 1.. & 2
22323 // 1.. * 2
22324 //
22325 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22326 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22327 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22328 break;
22329 }
22330
22331 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22332 break;
22333 }
22334 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22335 break;
22336 }
22337 }
22338
22339 if (accepts_command_call) {
22340 // A command-style method call is only accepted on method chains.
22341 // Thus, we check whether the parsed node can continue method chains.
22342 // The method chain can continue if the parsed node is one of the following five kinds:
22343 // (1) index access: foo[1]
22344 // (2) attribute access: foo.bar
22345 // (3) method call with parenthesis: foo.bar(1)
22346 // (4) method call with a block: foo.bar do end
22347 // (5) constant path: foo::Bar
22348 switch (node->type) {
22349 case PM_CALL_NODE: {
22350 pm_call_node_t *cast = (pm_call_node_t *)node;
22351 if (
22352 // (1) foo[1]
22353 !(
22354 cast->call_operator_loc.start == NULL &&
22355 cast->message_loc.start != NULL &&
22356 cast->message_loc.start[0] == '[' &&
22357 cast->message_loc.end[-1] == ']'
22358 ) &&
22359 // (2) foo.bar
22360 !(
22361 cast->call_operator_loc.start != NULL &&
22362 cast->arguments == NULL &&
22363 cast->block == NULL &&
22364 cast->opening_loc.start == NULL
22365 ) &&
22366 // (3) foo.bar(1)
22367 !(
22368 cast->call_operator_loc.start != NULL &&
22369 cast->opening_loc.start != NULL
22370 ) &&
22371 // (4) foo.bar do end
22372 !(
22373 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22374 )
22375 ) {
22376 accepts_command_call = false;
22377 }
22378 break;
22379 }
22380 // (5) foo::Bar
22381 case PM_CONSTANT_PATH_NODE:
22382 break;
22383 default:
22384 accepts_command_call = false;
22385 break;
22386 }
22387 }
22388 }
22389
22390 return node;
22391}
22392
22397static pm_statements_node_t *
22398wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22399 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22400 if (statements == NULL) {
22401 statements = pm_statements_node_create(parser);
22402 }
22403
22404 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22405 pm_arguments_node_arguments_append(
22406 arguments,
22407 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22408 );
22409
22410 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22411 parser,
22412 arguments,
22413 pm_parser_constant_id_constant(parser, "print", 5)
22414 ), true);
22415 }
22416
22417 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22418 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22419 if (statements == NULL) {
22420 statements = pm_statements_node_create(parser);
22421 }
22422
22423 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22424 pm_arguments_node_arguments_append(
22425 arguments,
22426 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22427 );
22428
22429 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22430 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22431
22432 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22433 parser,
22434 pm_parser_constant_id_constant(parser, "$F", 2),
22435 (pm_node_t *) call
22436 );
22437
22438 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22439 }
22440
22441 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22442 pm_arguments_node_arguments_append(
22443 arguments,
22444 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22445 );
22446
22447 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22448 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22449 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22450 parser,
22451 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22452 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22453 (pm_node_t *) pm_true_node_synthesized_create(parser)
22454 ));
22455
22456 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22457 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22458 }
22459
22460 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22461 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22462 parser,
22463 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22464 statements
22465 ), true);
22466
22467 statements = wrapped_statements;
22468 }
22469
22470 return statements;
22471}
22472
22476static pm_node_t *
22477parse_program(pm_parser_t *parser) {
22478 // If the current scope is NULL, then we want to push a new top level scope.
22479 // The current scope could exist in the event that we are parsing an eval
22480 // and the user has passed into scopes that already exist.
22481 if (parser->current_scope == NULL) {
22482 pm_parser_scope_push(parser, true);
22483 }
22484
22485 pm_node_list_t current_block_exits = { 0 };
22486 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22487
22488 parser_lex(parser);
22489 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22490
22491 if (statements != NULL && !parser->parsing_eval) {
22492 // If we have statements, then the top-level statement should be
22493 // explicitly checked as well. We have to do this here because
22494 // everywhere else we check all but the last statement.
22495 assert(statements->body.size > 0);
22496 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22497 }
22498
22499 pm_constant_id_list_t locals;
22500 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22501 pm_parser_scope_pop(parser);
22502
22503 // At the top level, see if we need to wrap the statements in a program
22504 // node with a while loop based on the options.
22506 statements = wrap_statements(parser, statements);
22507 } else {
22508 flush_block_exits(parser, previous_block_exits);
22509 pm_node_list_free(&current_block_exits);
22510 }
22511
22512 // If this is an empty file, then we're still going to parse all of the
22513 // statements in order to gather up all of the comments and such. Here we'll
22514 // correct the location information.
22515 if (statements == NULL) {
22516 statements = pm_statements_node_create(parser);
22517 pm_statements_node_location_set(statements, parser->start, parser->start);
22518 }
22519
22520 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22521}
22522
22523/******************************************************************************/
22524/* External functions */
22525/******************************************************************************/
22526
22536static const char *
22537pm_strnstr(const char *big, const char *little, size_t big_length) {
22538 size_t little_length = strlen(little);
22539
22540 for (const char *big_end = big + big_length; big < big_end; big++) {
22541 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22542 }
22543
22544 return NULL;
22545}
22546
22547#ifdef _WIN32
22548#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22549#else
22555static void
22556pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22557 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22558 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22559 }
22560}
22561#endif
22562
22567static void
22568pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22569 const char *switches = pm_strnstr(engine, " -", length);
22570 if (switches == NULL) return;
22571
22572 pm_options_t next_options = *options;
22573 options->shebang_callback(
22574 &next_options,
22575 (const uint8_t *) (switches + 1),
22576 length - ((size_t) (switches - engine)) - 1,
22577 options->shebang_callback_data
22578 );
22579
22580 size_t encoding_length;
22581 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22582 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22583 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22584 }
22585
22586 parser->command_line = next_options.command_line;
22587 parser->frozen_string_literal = next_options.frozen_string_literal;
22588}
22589
22594pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22595 assert(source != NULL);
22596
22597 *parser = (pm_parser_t) {
22598 .node_id = 0,
22599 .lex_state = PM_LEX_STATE_BEG,
22600 .enclosure_nesting = 0,
22601 .lambda_enclosure_nesting = -1,
22602 .brace_nesting = 0,
22603 .do_loop_stack = 0,
22604 .accepts_block_stack = 0,
22605 .lex_modes = {
22606 .index = 0,
22607 .stack = {{ .mode = PM_LEX_DEFAULT }},
22608 .current = &parser->lex_modes.stack[0],
22609 },
22610 .start = source,
22611 .end = source + size,
22612 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22613 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22614 .next_start = NULL,
22615 .heredoc_end = NULL,
22616 .data_loc = { .start = NULL, .end = NULL },
22617 .comment_list = { 0 },
22618 .magic_comment_list = { 0 },
22619 .warning_list = { 0 },
22620 .error_list = { 0 },
22621 .current_scope = NULL,
22622 .current_context = NULL,
22623 .encoding = PM_ENCODING_UTF_8_ENTRY,
22624 .encoding_changed_callback = NULL,
22625 .encoding_comment_start = source,
22626 .lex_callback = NULL,
22627 .filepath = { 0 },
22628 .constant_pool = { 0 },
22629 .newline_list = { 0 },
22630 .integer_base = 0,
22631 .current_string = PM_STRING_EMPTY,
22632 .start_line = 1,
22633 .explicit_encoding = NULL,
22634 .command_line = 0,
22635 .parsing_eval = false,
22636 .partial_script = false,
22637 .command_start = true,
22638 .recovering = false,
22639 .encoding_locked = false,
22640 .encoding_changed = false,
22641 .pattern_matching_newlines = false,
22642 .in_keyword_arg = false,
22643 .current_block_exits = NULL,
22644 .semantic_token_seen = false,
22645 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22646 .current_regular_expression_ascii_only = false,
22647 .warn_mismatched_indentation = true
22648 };
22649
22650 // Initialize the constant pool. We're going to completely guess as to the
22651 // number of constants that we'll need based on the size of the input. The
22652 // ratio we chose here is actually less arbitrary than you might think.
22653 //
22654 // We took ~50K Ruby files and measured the size of the file versus the
22655 // number of constants that were found in those files. Then we found the
22656 // average and standard deviation of the ratios of constants/bytesize. Then
22657 // we added 1.34 standard deviations to the average to get a ratio that
22658 // would fit 75% of the files (for a two-tailed distribution). This works
22659 // because there was about a 0.77 correlation and the distribution was
22660 // roughly normal.
22661 //
22662 // This ratio will need to change if we add more constants to the constant
22663 // pool for another node type.
22664 uint32_t constant_size = ((uint32_t) size) / 95;
22665 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22666
22667 // Initialize the newline list. Similar to the constant pool, we're going to
22668 // guess at the number of newlines that we'll need based on the size of the
22669 // input.
22670 size_t newline_size = size / 22;
22671 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22672
22673 // If options were provided to this parse, establish them here.
22674 if (options != NULL) {
22675 // filepath option
22676 parser->filepath = options->filepath;
22677
22678 // line option
22679 parser->start_line = options->line;
22680
22681 // encoding option
22682 size_t encoding_length = pm_string_length(&options->encoding);
22683 if (encoding_length > 0) {
22684 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22685 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22686 }
22687
22688 // encoding_locked option
22689 parser->encoding_locked = options->encoding_locked;
22690
22691 // frozen_string_literal option
22693
22694 // command_line option
22695 parser->command_line = options->command_line;
22696
22697 // version option
22698 parser->version = options->version;
22699
22700 // partial_script
22701 parser->partial_script = options->partial_script;
22702
22703 // scopes option
22704 parser->parsing_eval = options->scopes_count > 0;
22705 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22706
22707 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22708 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22709 pm_parser_scope_push(parser, scope_index == 0);
22710
22711 // Scopes given from the outside are not allowed to have numbered
22712 // parameters.
22713 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22714
22715 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22716 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22717
22718 const uint8_t *source = pm_string_source(local);
22719 size_t length = pm_string_length(local);
22720
22721 void *allocated = xmalloc(length);
22722 if (allocated == NULL) continue;
22723
22724 memcpy(allocated, source, length);
22725 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22726 }
22727 }
22728 }
22729
22730 // Now that we have established the user-provided options, check if
22731 // a version was given and parse as the latest version otherwise.
22732 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22734 }
22735
22736 pm_accepts_block_stack_push(parser, true);
22737
22738 // Skip past the UTF-8 BOM if it exists.
22739 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22740 parser->current.end += 3;
22741 parser->encoding_comment_start += 3;
22742
22743 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22745 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22746 }
22747 }
22748
22749 // If the -x command line flag is set, or the first shebang of the file does
22750 // not include "ruby", then we'll search for a shebang that does include
22751 // "ruby" and start parsing from there.
22752 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22753
22754 // If the first two bytes of the source are a shebang, then we will do a bit
22755 // of extra processing.
22756 //
22757 // First, we'll indicate that the encoding comment is at the end of the
22758 // shebang. This means that when a shebang is present the encoding comment
22759 // can begin on the second line.
22760 //
22761 // Second, we will check if the shebang includes "ruby". If it does, then we
22762 // we will start parsing from there. We will also potentially warning the
22763 // user if there is a carriage return at the end of the shebang. We will
22764 // also potentially call the shebang callback if this is the main script to
22765 // allow the caller to parse the shebang and find any command-line options.
22766 // If the shebang does not include "ruby" and this is the main script being
22767 // parsed, then we will start searching the file for a shebang that does
22768 // contain "ruby" as if -x were passed on the command line.
22769 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22770 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22771
22772 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22773 const char *engine;
22774
22775 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22776 if (newline != NULL) {
22777 parser->encoding_comment_start = newline + 1;
22778
22779 if (options == NULL || options->main_script) {
22780 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22781 }
22782 }
22783
22784 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22785 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22786 }
22787
22788 search_shebang = false;
22789 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22790 search_shebang = true;
22791 }
22792 }
22793
22794 // Here we're going to find the first shebang that includes "ruby" and start
22795 // parsing from there.
22796 if (search_shebang) {
22797 // If a shebang that includes "ruby" is not found, then we're going to a
22798 // a load error to the list of errors on the parser.
22799 bool found_shebang = false;
22800
22801 // This is going to point to the start of each line as we check it.
22802 // We'll maintain a moving window looking at each line at they come.
22803 const uint8_t *cursor = parser->start;
22804
22805 // The newline pointer points to the end of the current line that we're
22806 // considering. If it is NULL, then we're at the end of the file.
22807 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22808
22809 while (newline != NULL) {
22810 pm_newline_list_append(&parser->newline_list, newline);
22811
22812 cursor = newline + 1;
22813 newline = next_newline(cursor, parser->end - cursor);
22814
22815 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22816 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22817 const char *engine;
22818 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22819 found_shebang = true;
22820
22821 if (newline != NULL) {
22822 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22823 parser->encoding_comment_start = newline + 1;
22824 }
22825
22826 if (options != NULL && options->shebang_callback != NULL) {
22827 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22828 }
22829
22830 break;
22831 }
22832 }
22833 }
22834
22835 if (found_shebang) {
22836 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22837 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22838 } else {
22839 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22840 pm_newline_list_clear(&parser->newline_list);
22841 }
22842 }
22843
22844 // The encoding comment can start after any amount of inline whitespace, so
22845 // here we'll advance it to the first non-inline-whitespace character so
22846 // that it is ready for future comparisons.
22847 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22848}
22849
22858
22862static inline void
22863pm_comment_list_free(pm_list_t *list) {
22864 pm_list_node_t *node, *next;
22865
22866 for (node = list->head; node != NULL; node = next) {
22867 next = node->next;
22868
22869 pm_comment_t *comment = (pm_comment_t *) node;
22870 xfree(comment);
22871 }
22872}
22873
22877static inline void
22878pm_magic_comment_list_free(pm_list_t *list) {
22879 pm_list_node_t *node, *next;
22880
22881 for (node = list->head; node != NULL; node = next) {
22882 next = node->next;
22883
22886 }
22887}
22888
22894 pm_string_free(&parser->filepath);
22895 pm_diagnostic_list_free(&parser->error_list);
22896 pm_diagnostic_list_free(&parser->warning_list);
22897 pm_comment_list_free(&parser->comment_list);
22898 pm_magic_comment_list_free(&parser->magic_comment_list);
22899 pm_constant_pool_free(&parser->constant_pool);
22900 pm_newline_list_free(&parser->newline_list);
22901
22902 while (parser->current_scope != NULL) {
22903 // Normally, popping the scope doesn't free the locals since it is
22904 // assumed that ownership has transferred to the AST. However if we have
22905 // scopes while we're freeing the parser, it's likely they came from
22906 // eval scopes and we need to free them explicitly here.
22907 pm_parser_scope_pop(parser);
22908 }
22909
22910 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22911 lex_mode_pop(parser);
22912 }
22913}
22914
22920 return parse_program(parser);
22921}
22922
22928static bool
22929pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22930#define LINE_SIZE 4096
22931 char line[LINE_SIZE];
22932
22933 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22934 size_t length = LINE_SIZE;
22935 while (length > 0 && line[length - 1] == '\n') length--;
22936
22937 if (length == LINE_SIZE) {
22938 // If we read a line that is the maximum size and it doesn't end
22939 // with a newline, then we'll just append it to the buffer and
22940 // continue reading.
22941 length--;
22942 pm_buffer_append_string(buffer, line, length);
22943 continue;
22944 }
22945
22946 // Append the line to the buffer.
22947 length--;
22948 pm_buffer_append_string(buffer, line, length);
22949
22950 // Check if the line matches the __END__ marker. If it does, then stop
22951 // reading and return false. In most circumstances, this means we should
22952 // stop reading from the stream so that the DATA constant can pick it
22953 // up.
22954 switch (length) {
22955 case 7:
22956 if (strncmp(line, "__END__", 7) == 0) return false;
22957 break;
22958 case 8:
22959 if (strncmp(line, "__END__\n", 8) == 0) return false;
22960 break;
22961 case 9:
22962 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22963 break;
22964 }
22965
22966 // All data should be read via gets. If the string returned by gets
22967 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22968 if (stream_feof(stream)) {
22969 break;
22970 }
22971 }
22972
22973 return true;
22974#undef LINE_SIZE
22975}
22976
22986static bool
22987pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22988 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22989
22990 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22991 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22992 return true;
22993 }
22994 }
22995
22996 return false;
22997}
22998
23006pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
23007 pm_buffer_init(buffer);
23008
23009 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23010
23011 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23012 pm_node_t *node = pm_parse(parser);
23013
23014 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
23015 pm_node_destroy(parser, node);
23016 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23017
23018 pm_parser_free(parser);
23019 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23020 node = pm_parse(parser);
23021 }
23022
23023 return node;
23024}
23025
23030pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
23031 pm_options_t options = { 0 };
23032 pm_options_read(&options, data);
23033
23034 pm_parser_t parser;
23035 pm_parser_init(&parser, source, size, &options);
23036
23037 pm_node_t *node = pm_parse(&parser);
23038 pm_node_destroy(&parser, node);
23039
23040 bool result = parser.error_list.size == 0;
23041 pm_parser_free(&parser);
23042 pm_options_free(&options);
23043
23044 return result;
23045}
23046
23047#undef PM_CASE_KEYWORD
23048#undef PM_CASE_OPERATOR
23049#undef PM_CASE_WRITABLE
23050#undef PM_STRING_EMPTY
23051#undef PM_LOCATION_NODE_BASE_VALUE
23052#undef PM_LOCATION_NODE_VALUE
23053#undef PM_LOCATION_NULL_VALUE
23054#undef PM_LOCATION_TOKEN_VALUE
23055
23056// We optionally support serializing to a binary string. For systems that don't
23057// want or need this functionality, it can be turned off with the
23058// PRISM_EXCLUDE_SERIALIZATION define.
23059#ifndef PRISM_EXCLUDE_SERIALIZATION
23060
23061static inline void
23062pm_serialize_header(pm_buffer_t *buffer) {
23063 pm_buffer_append_string(buffer, "PRISM", 5);
23064 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
23065 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
23066 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
23067 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
23068}
23069
23074pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
23075 pm_serialize_header(buffer);
23076 pm_serialize_content(parser, node, buffer);
23077 pm_buffer_append_byte(buffer, '\0');
23078}
23079
23085pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23086 pm_options_t options = { 0 };
23087 pm_options_read(&options, data);
23088
23089 pm_parser_t parser;
23090 pm_parser_init(&parser, source, size, &options);
23091
23092 pm_node_t *node = pm_parse(&parser);
23093
23094 pm_serialize_header(buffer);
23095 pm_serialize_content(&parser, node, buffer);
23096 pm_buffer_append_byte(buffer, '\0');
23097
23098 pm_node_destroy(&parser, node);
23099 pm_parser_free(&parser);
23100 pm_options_free(&options);
23101}
23102
23108pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
23109 pm_parser_t parser;
23110 pm_options_t options = { 0 };
23111 pm_options_read(&options, data);
23112
23113 pm_buffer_t parser_buffer;
23114 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
23115 pm_serialize_header(buffer);
23116 pm_serialize_content(&parser, node, buffer);
23117 pm_buffer_append_byte(buffer, '\0');
23118
23119 pm_node_destroy(&parser, node);
23120 pm_buffer_free(&parser_buffer);
23121 pm_parser_free(&parser);
23122 pm_options_free(&options);
23123}
23124
23129pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23130 pm_options_t options = { 0 };
23131 pm_options_read(&options, data);
23132
23133 pm_parser_t parser;
23134 pm_parser_init(&parser, source, size, &options);
23135
23136 pm_node_t *node = pm_parse(&parser);
23137 pm_serialize_header(buffer);
23138 pm_serialize_encoding(parser.encoding, buffer);
23139 pm_buffer_append_varsint(buffer, parser.start_line);
23140 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23141
23142 pm_node_destroy(&parser, node);
23143 pm_parser_free(&parser);
23144 pm_options_free(&options);
23145}
23146
23147#endif
23148
23149/******************************************************************************/
23150/* Slice queries for the Ruby API */
23151/******************************************************************************/
23152
23154typedef enum {
23156 PM_SLICE_TYPE_ERROR = -1,
23157
23159 PM_SLICE_TYPE_NONE,
23160
23162 PM_SLICE_TYPE_LOCAL,
23163
23165 PM_SLICE_TYPE_CONSTANT,
23166
23168 PM_SLICE_TYPE_METHOD_NAME
23169} pm_slice_type_t;
23170
23174pm_slice_type_t
23175pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23176 // first, get the right encoding object
23177 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23178 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23179
23180 // check that there is at least one character
23181 if (length == 0) return PM_SLICE_TYPE_NONE;
23182
23183 size_t width;
23184 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23185 // valid because alphabetical
23186 } else if (*source == '_') {
23187 // valid because underscore
23188 width = 1;
23189 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23190 // valid because multibyte
23191 } else {
23192 // invalid because no match
23193 return PM_SLICE_TYPE_NONE;
23194 }
23195
23196 // determine the type of the slice based on the first character
23197 const uint8_t *end = source + length;
23198 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23199
23200 // next, iterate through all of the bytes of the string to ensure that they
23201 // are all valid identifier characters
23202 source += width;
23203
23204 while (source < end) {
23205 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23206 // valid because alphanumeric
23207 source += width;
23208 } else if (*source == '_') {
23209 // valid because underscore
23210 source++;
23211 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23212 // valid because multibyte
23213 source += width;
23214 } else {
23215 // invalid because no match
23216 break;
23217 }
23218 }
23219
23220 // accept a ! or ? at the end of the slice as a method name
23221 if (*source == '!' || *source == '?' || *source == '=') {
23222 source++;
23223 result = PM_SLICE_TYPE_METHOD_NAME;
23224 }
23225
23226 // valid if we are at the end of the slice
23227 return source == end ? result : PM_SLICE_TYPE_NONE;
23228}
23229
23234pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23235 switch (pm_slice_type(source, length, encoding_name)) {
23236 case PM_SLICE_TYPE_ERROR:
23237 return PM_STRING_QUERY_ERROR;
23238 case PM_SLICE_TYPE_NONE:
23239 case PM_SLICE_TYPE_CONSTANT:
23240 case PM_SLICE_TYPE_METHOD_NAME:
23241 return PM_STRING_QUERY_FALSE;
23242 case PM_SLICE_TYPE_LOCAL:
23243 return PM_STRING_QUERY_TRUE;
23244 }
23245
23246 assert(false && "unreachable");
23247 return PM_STRING_QUERY_FALSE;
23248}
23249
23254pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23255 switch (pm_slice_type(source, length, encoding_name)) {
23256 case PM_SLICE_TYPE_ERROR:
23257 return PM_STRING_QUERY_ERROR;
23258 case PM_SLICE_TYPE_NONE:
23259 case PM_SLICE_TYPE_LOCAL:
23260 case PM_SLICE_TYPE_METHOD_NAME:
23261 return PM_STRING_QUERY_FALSE;
23262 case PM_SLICE_TYPE_CONSTANT:
23263 return PM_STRING_QUERY_TRUE;
23264 }
23265
23266 assert(false && "unreachable");
23267 return PM_STRING_QUERY_FALSE;
23268}
23269
23274pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23275#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23276#define C1(c) (*source == c)
23277#define C2(s) (memcmp(source, s, 2) == 0)
23278#define C3(s) (memcmp(source, s, 3) == 0)
23279
23280 switch (pm_slice_type(source, length, encoding_name)) {
23281 case PM_SLICE_TYPE_ERROR:
23282 return PM_STRING_QUERY_ERROR;
23283 case PM_SLICE_TYPE_NONE:
23284 break;
23285 case PM_SLICE_TYPE_LOCAL:
23286 // numbered parameters are not valid method names
23287 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23288 case PM_SLICE_TYPE_CONSTANT:
23289 // all constants are valid method names
23290 case PM_SLICE_TYPE_METHOD_NAME:
23291 // all method names are valid method names
23292 return PM_STRING_QUERY_TRUE;
23293 }
23294
23295 switch (length) {
23296 case 1:
23297 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23298 case 2:
23299 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23300 case 3:
23301 return B(C3("===") || C3("<=>") || C3("[]="));
23302 default:
23303 return PM_STRING_QUERY_FALSE;
23304 }
23305
23306#undef B
23307#undef C1
23308#undef C2
23309#undef C3
23310}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:208
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:192
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:172
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:219
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:225
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:98
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_3_5
The vendored version of prism in CRuby 3.5.x.
Definition options.h:95
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:353
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:361
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:369
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22919
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22855
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22893
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:23006
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:22594
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2141
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2118
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2048
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:18070
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:18072
const uint8_t * start
The start of the regular expression.
Definition prism.c:18075
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18086
const uint8_t * end
The end of the regular expression.
Definition prism.c:18078
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20991
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:21002
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20993
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20999
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20996
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:21009
AndNode.
Definition ast.h:1262
struct pm_node * left
AndNode::left.
Definition ast.h:1278
struct pm_node * right
AndNode::right.
Definition ast.h:1291
ArgumentsNode.
Definition ast.h:1323
pm_node_t base
The embedded base node.
Definition ast.h:1325
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1336
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1354
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1364
ArrayPatternNode.
Definition ast.h:1415
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1434
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1474
pm_node_t base
The embedded base node.
Definition ast.h:1417
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1484
AssocNode.
Definition ast.h:1499
struct pm_node * value
AssocNode::value.
Definition ast.h:1531
struct pm_node * key
AssocNode::key.
Definition ast.h:1518
BeginNode.
Definition ast.h:1625
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1678
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1658
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1648
pm_node_t base
The embedded base node.
Definition ast.h:1627
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1668
This struct represents a set of binding powers used for a given token.
Definition prism.c:13024
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:13032
pm_binding_power_t left
The left binding power.
Definition prism.c:13026
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:13038
pm_binding_power_t right
The right binding power.
Definition prism.c:13029
BlockLocalVariableNode.
Definition ast.h:1744
BlockNode.
Definition ast.h:1772
BlockParameterNode.
Definition ast.h:1848
BlockParametersNode.
Definition ast.h:1902
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2129
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2190
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2210
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2148
pm_constant_id_t name
CallNode::name.
Definition ast.h:2171
pm_node_t base
The embedded base node.
Definition ast.h:2131
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2161
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2181
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2200
struct pm_node * block
CallNode::block.
Definition ast.h:2220
CaseMatchNode.
Definition ast.h:2555
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2578
CaseNode.
Definition ast.h:2625
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2648
ClassVariableReadNode.
Definition ast.h:2920
ClassVariableTargetNode.
Definition ast.h:2949
ClassVariableWriteNode.
Definition ast.h:2972
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3186
ConstantPathTargetNode.
Definition ast.h:3324
ConstantReadNode.
Definition ast.h:3419
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3448
ConstantWriteNode.
Definition ast.h:3471
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:363
ElseNode.
Definition ast.h:3650
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3663
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3748
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3761
FindPatternNode.
Definition ast.h:3808
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3821
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3873
pm_node_t base
The embedded base node.
Definition ast.h:3810
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3886
FlipFlopNode.
Definition ast.h:3904
FloatNode.
Definition ast.h:3937
double value
FloatNode::value.
Definition ast.h:3947
pm_node_t base
The embedded base node.
Definition ast.h:3939
ForwardingParameterNode.
Definition ast.h:4073
GlobalVariableReadNode.
Definition ast.h:4233
GlobalVariableTargetNode.
Definition ast.h:4262
GlobalVariableWriteNode.
Definition ast.h:4285
HashNode.
Definition ast.h:4347
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4373
HashPatternNode.
Definition ast.h:4407
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4462
pm_node_t base
The embedded base node.
Definition ast.h:4409
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4475
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4423
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4496
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4556
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4575
ImaginaryNode.
Definition ast.h:4602
InstanceVariableReadNode.
Definition ast.h:5092
InstanceVariableTargetNode.
Definition ast.h:5121
InstanceVariableWriteNode.
Definition ast.h:5144
IntegerNode.
Definition ast.h:5212
pm_integer_t value
IntegerNode::value.
Definition ast.h:5222
pm_node_t base
The embedded base node.
Definition ast.h:5214
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5250
InterpolatedRegularExpressionNode.
Definition ast.h:5296
InterpolatedStringNode.
Definition ast.h:5333
pm_node_t base
The embedded base node.
Definition ast.h:5335
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5341
InterpolatedSymbolNode.
Definition ast.h:5366
pm_node_t base
The embedded base node.
Definition ast.h:5368
InterpolatedXStringNode.
Definition ast.h:5399
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5407
pm_node_t base
The embedded base node.
Definition ast.h:5401
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5412
KeywordHashNode.
Definition ast.h:5471
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@95 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
union pm_lex_mode::@96 as
The data associated with this type of lex mode.
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5713
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5744
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5731
LocalVariableTargetNode.
Definition ast.h:5762
LocalVariableWriteNode.
Definition ast.h:5790
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5817
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5804
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5882
MatchWriteNode.
Definition ast.h:6040
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6053
MissingNode.
Definition ast.h:6065
MultiTargetNode.
Definition ast.h:6136
pm_node_t base
The embedded base node.
Definition ast.h:6138
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6194
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6154
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6204
MultiWriteNode.
Definition ast.h:6219
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1068
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1073
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1079
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
OptionalParameterNode.
Definition ast.h:6492
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:104
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:153
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:115
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:169
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:176
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:130
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:109
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:162
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:186
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:135
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:118
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:150
OrNode.
Definition ast.h:6530
struct pm_node * left
OrNode::left.
Definition ast.h:6546
struct pm_node * right
OrNode::right.
Definition ast.h:6559
ParametersNode.
Definition ast.h:6585
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6603
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6623
pm_node_t base
The embedded base node.
Definition ast.h:6587
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6618
ParenthesesNode.
Definition ast.h:6641
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6649
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
struct pm_parser::@101 lex_modes
A stack of lex modes.
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6877
struct pm_node * right
RangeNode::right.
Definition ast.h:6907
struct pm_node * left
RangeNode::left.
Definition ast.h:6893
RationalNode.
Definition ast.h:6935
pm_node_t base
The embedded base node.
Definition ast.h:6937
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6947
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10370
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10375
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10372
RegularExpressionNode.
Definition ast.h:7002
pm_node_t base
The embedded base node.
Definition ast.h:7004
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7025
RequiredParameterNode.
Definition ast.h:7076
RescueModifierNode.
Definition ast.h:7099
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7117
RescueNode.
Definition ast.h:7137
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7175
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7165
pm_node_t base
The embedded base node.
Definition ast.h:7139
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7437
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7450
StatementsNode.
Definition ast.h:7465
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7473
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7500
pm_node_t base
The embedded base node.
Definition ast.h:7502
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7523
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7518
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7508
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@102 type
The type of the string.
SymbolNode.
Definition ast.h:7592
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7605
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7615
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10344
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10349
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10355
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7648
UnlessNode.
Definition ast.h:7679
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7729
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7739
WhenNode.
Definition ast.h:7815
XStringNode.
Definition ast.h:7906