Ruby 3.5.0dev (2025-08-13 revision 40d07f268e63aa2cdbaf3b31b227cecc5ba7e9e0)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1043 case PM_MATCH_REQUIRED_NODE:
1044 return void_node != NULL ? void_node : node;
1045 case PM_MATCH_PREDICATE_NODE:
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1151 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1185 case PM_BACK_REFERENCE_READ_NODE:
1186 case PM_CLASS_VARIABLE_READ_NODE:
1187 case PM_GLOBAL_VARIABLE_READ_NODE:
1188 case PM_INSTANCE_VARIABLE_READ_NODE:
1189 case PM_LOCAL_VARIABLE_READ_NODE:
1190 case PM_NUMBERED_REFERENCE_READ_NODE:
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1249 case PM_CONSTANT_PATH_NODE:
1250 type = "::";
1251 length = 2;
1252 break;
1253 case PM_CONSTANT_READ_NODE:
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1268 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1269 case PM_INTERPOLATED_STRING_NODE:
1270 case PM_RATIONAL_NODE:
1271 case PM_REGULAR_EXPRESSION_NODE:
1272 case PM_SOURCE_ENCODING_NODE:
1273 case PM_SOURCE_FILE_NODE:
1274 case PM_SOURCE_LINE_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1287 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1392 case PM_REGULAR_EXPRESSION_NODE:
1393 case PM_SOURCE_ENCODING_NODE:
1394 case PM_SOURCE_FILE_NODE:
1395 case PM_SOURCE_LINE_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1477 case PM_REGULAR_EXPRESSION_NODE:
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1482 node->type = PM_MATCH_LAST_LINE_NODE;
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1489 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1494 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1511 case PM_SOURCE_FILE_NODE:
1512 case PM_INTERPOLATED_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1516 case PM_INTERPOLATED_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1519 case PM_SOURCE_LINE_NODE:
1520 case PM_SOURCE_ENCODING_NODE:
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1526 case PM_CLASS_VARIABLE_WRITE_NODE:
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1529 case PM_CONSTANT_WRITE_NODE:
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1532 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1535 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1538 case PM_LOCAL_VARIABLE_WRITE_NODE:
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1541 case PM_MULTI_WRITE_NODE:
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2629
2635static pm_call_node_t *
2636pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2637 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2638
2639 *node = (pm_call_node_t) {
2640 {
2641 .type = PM_CALL_NODE,
2642 .flags = flags,
2643 .node_id = PM_NODE_IDENTIFY(parser),
2644 .location = PM_LOCATION_NULL_VALUE(parser),
2645 },
2646 .receiver = NULL,
2647 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2648 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .arguments = NULL,
2651 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2652 .block = NULL,
2653 .name = 0
2654 };
2655
2656 return node;
2657}
2658
2663static inline pm_node_flags_t
2664pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2665 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2666}
2667
2672static pm_call_node_t *
2673pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2674 pm_assert_value_expression(parser, receiver);
2675
2676 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2677 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2678 flags |= PM_CALL_NODE_FLAGS_INDEX;
2679 }
2680
2681 pm_call_node_t *node = pm_call_node_create(parser, flags);
2682
2683 node->base.location.start = receiver->location.start;
2684 node->base.location.end = pm_arguments_end(arguments);
2685
2686 node->receiver = receiver;
2687 node->message_loc.start = arguments->opening_loc.start;
2688 node->message_loc.end = arguments->closing_loc.end;
2689
2690 node->opening_loc = arguments->opening_loc;
2691 node->arguments = arguments->arguments;
2692 node->closing_loc = arguments->closing_loc;
2693 node->block = arguments->block;
2694
2695 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2696 return node;
2697}
2698
2702static pm_call_node_t *
2703pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2704 pm_assert_value_expression(parser, receiver);
2705 pm_assert_value_expression(parser, argument);
2706
2707 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2708
2709 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2710 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2711
2712 node->receiver = receiver;
2713 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2714
2715 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2716 pm_arguments_node_arguments_append(arguments, argument);
2717 node->arguments = arguments;
2718
2719 node->name = pm_parser_constant_id_token(parser, operator);
2720 return node;
2721}
2722
2726static pm_call_node_t *
2727pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2728 pm_assert_value_expression(parser, receiver);
2729
2730 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2731
2732 node->base.location.start = receiver->location.start;
2733 const uint8_t *end = pm_arguments_end(arguments);
2734 if (end == NULL) {
2735 end = message->end;
2736 }
2737 node->base.location.end = end;
2738
2739 node->receiver = receiver;
2740 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2741 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2742 node->opening_loc = arguments->opening_loc;
2743 node->arguments = arguments->arguments;
2744 node->closing_loc = arguments->closing_loc;
2745 node->block = arguments->block;
2746
2747 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2748 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2749 }
2750
2751 node->name = pm_parser_constant_id_token(parser, message);
2752 return node;
2753}
2754
2758static pm_call_node_t *
2759pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2760 pm_call_node_t *node = pm_call_node_create(parser, 0);
2761 node->base.location.start = parser->start;
2762 node->base.location.end = parser->end;
2763
2764 node->receiver = receiver;
2765 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2766 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->arguments = arguments;
2768
2769 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2770 return node;
2771}
2772
2777static pm_call_node_t *
2778pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2779 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2780
2781 node->base.location.start = message->start;
2782 node->base.location.end = pm_arguments_end(arguments);
2783
2784 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2785 node->opening_loc = arguments->opening_loc;
2786 node->arguments = arguments->arguments;
2787 node->closing_loc = arguments->closing_loc;
2788 node->block = arguments->block;
2789
2790 node->name = pm_parser_constant_id_token(parser, message);
2791 return node;
2792}
2793
2798static pm_call_node_t *
2799pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2800 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2801
2802 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2803 node->arguments = arguments;
2804
2805 node->name = name;
2806 return node;
2807}
2808
2812static pm_call_node_t *
2813pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2814 pm_assert_value_expression(parser, receiver);
2815 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2816
2817 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2818
2819 node->base.location.start = message->start;
2820 if (arguments->closing_loc.start != NULL) {
2821 node->base.location.end = arguments->closing_loc.end;
2822 } else {
2823 assert(receiver != NULL);
2824 node->base.location.end = receiver->location.end;
2825 }
2826
2827 node->receiver = receiver;
2828 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2829 node->opening_loc = arguments->opening_loc;
2830 node->arguments = arguments->arguments;
2831 node->closing_loc = arguments->closing_loc;
2832
2833 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2834 return node;
2835}
2836
2840static pm_call_node_t *
2841pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2842 pm_assert_value_expression(parser, receiver);
2843
2844 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2845
2846 node->base.location.start = receiver->location.start;
2847 node->base.location.end = pm_arguments_end(arguments);
2848
2849 node->receiver = receiver;
2850 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2851 node->opening_loc = arguments->opening_loc;
2852 node->arguments = arguments->arguments;
2853 node->closing_loc = arguments->closing_loc;
2854 node->block = arguments->block;
2855
2856 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2857 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2858 }
2859
2860 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2861 return node;
2862}
2863
2867static pm_call_node_t *
2868pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2869 pm_assert_value_expression(parser, receiver);
2870
2871 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2872
2873 node->base.location.start = operator->start;
2874 node->base.location.end = receiver->location.end;
2875
2876 node->receiver = receiver;
2877 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2878
2879 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2880 return node;
2881}
2882
2887static pm_call_node_t *
2888pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2889 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2890
2891 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2892 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2893
2894 node->name = pm_parser_constant_id_token(parser, message);
2895 return node;
2896}
2897
2902static inline bool
2903pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2904 return (
2905 (node->message_loc.start != NULL) &&
2906 (node->message_loc.end[-1] != '!') &&
2907 (node->message_loc.end[-1] != '?') &&
2908 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2909 (node->opening_loc.start == NULL) &&
2910 (node->arguments == NULL) &&
2911 (node->block == NULL)
2912 );
2913}
2914
2918static void
2919pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2920 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2921
2922 if (write_constant->length > 0) {
2923 size_t length = write_constant->length - 1;
2924
2925 void *memory = xmalloc(length);
2926 memcpy(memory, write_constant->start, length);
2927
2928 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2929 } else {
2930 // We can get here if the message was missing because of a syntax error.
2931 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2932 }
2933}
2934
2938static pm_call_and_write_node_t *
2939pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2940 assert(target->block == NULL);
2941 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2942 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2943
2944 *node = (pm_call_and_write_node_t) {
2945 {
2946 .type = PM_CALL_AND_WRITE_NODE,
2947 .flags = target->base.flags,
2948 .node_id = PM_NODE_IDENTIFY(parser),
2949 .location = {
2950 .start = target->base.location.start,
2951 .end = value->location.end
2952 }
2953 },
2954 .receiver = target->receiver,
2955 .call_operator_loc = target->call_operator_loc,
2956 .message_loc = target->message_loc,
2957 .read_name = 0,
2958 .write_name = target->name,
2959 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2960 .value = value
2961 };
2962
2963 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2964
2965 // Here we're going to free the target, since it is no longer necessary.
2966 // However, we don't want to call `pm_node_destroy` because we want to keep
2967 // around all of its children since we just reused them.
2968 xfree(target);
2969
2970 return node;
2971}
2972
2977static void
2978pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2980 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981 pm_node_t *node;
2982 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2983 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2984 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2985 break;
2986 }
2987 }
2988 }
2989
2990 if (block != NULL) {
2991 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2992 }
2993 }
2994}
2995
2999static pm_index_and_write_node_t *
3000pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3001 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3002 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3003
3004 pm_index_arguments_check(parser, target->arguments, target->block);
3005
3006 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3007 *node = (pm_index_and_write_node_t) {
3008 {
3009 .type = PM_INDEX_AND_WRITE_NODE,
3010 .flags = target->base.flags,
3011 .node_id = PM_NODE_IDENTIFY(parser),
3012 .location = {
3013 .start = target->base.location.start,
3014 .end = value->location.end
3015 }
3016 },
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .opening_loc = target->opening_loc,
3020 .arguments = target->arguments,
3021 .closing_loc = target->closing_loc,
3022 .block = (pm_block_argument_node_t *) target->block,
3023 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3024 .value = value
3025 };
3026
3027 // Here we're going to free the target, since it is no longer necessary.
3028 // However, we don't want to call `pm_node_destroy` because we want to keep
3029 // around all of its children since we just reused them.
3030 xfree(target);
3031
3032 return node;
3033}
3034
3038static pm_call_operator_write_node_t *
3039pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3040 assert(target->block == NULL);
3041 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3042
3043 *node = (pm_call_operator_write_node_t) {
3044 {
3045 .type = PM_CALL_OPERATOR_WRITE_NODE,
3046 .flags = target->base.flags,
3047 .node_id = PM_NODE_IDENTIFY(parser),
3048 .location = {
3049 .start = target->base.location.start,
3050 .end = value->location.end
3051 }
3052 },
3053 .receiver = target->receiver,
3054 .call_operator_loc = target->call_operator_loc,
3055 .message_loc = target->message_loc,
3056 .read_name = 0,
3057 .write_name = target->name,
3058 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3059 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3060 .value = value
3061 };
3062
3063 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3064
3065 // Here we're going to free the target, since it is no longer necessary.
3066 // However, we don't want to call `pm_node_destroy` because we want to keep
3067 // around all of its children since we just reused them.
3068 xfree(target);
3069
3070 return node;
3071}
3072
3076static pm_index_operator_write_node_t *
3077pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3078 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3079
3080 pm_index_arguments_check(parser, target->arguments, target->block);
3081
3082 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3083 *node = (pm_index_operator_write_node_t) {
3084 {
3085 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3086 .flags = target->base.flags,
3087 .node_id = PM_NODE_IDENTIFY(parser),
3088 .location = {
3089 .start = target->base.location.start,
3090 .end = value->location.end
3091 }
3092 },
3093 .receiver = target->receiver,
3094 .call_operator_loc = target->call_operator_loc,
3095 .opening_loc = target->opening_loc,
3096 .arguments = target->arguments,
3097 .closing_loc = target->closing_loc,
3098 .block = (pm_block_argument_node_t *) target->block,
3099 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3100 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3101 .value = value
3102 };
3103
3104 // Here we're going to free the target, since it is no longer necessary.
3105 // However, we don't want to call `pm_node_destroy` because we want to keep
3106 // around all of its children since we just reused them.
3107 xfree(target);
3108
3109 return node;
3110}
3111
3115static pm_call_or_write_node_t *
3116pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3117 assert(target->block == NULL);
3118 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3119 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3120
3121 *node = (pm_call_or_write_node_t) {
3122 {
3123 .type = PM_CALL_OR_WRITE_NODE,
3124 .flags = target->base.flags,
3125 .node_id = PM_NODE_IDENTIFY(parser),
3126 .location = {
3127 .start = target->base.location.start,
3128 .end = value->location.end
3129 }
3130 },
3131 .receiver = target->receiver,
3132 .call_operator_loc = target->call_operator_loc,
3133 .message_loc = target->message_loc,
3134 .read_name = 0,
3135 .write_name = target->name,
3136 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3137 .value = value
3138 };
3139
3140 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3141
3142 // Here we're going to free the target, since it is no longer necessary.
3143 // However, we don't want to call `pm_node_destroy` because we want to keep
3144 // around all of its children since we just reused them.
3145 xfree(target);
3146
3147 return node;
3148}
3149
3153static pm_index_or_write_node_t *
3154pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3155 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3156 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3157
3158 pm_index_arguments_check(parser, target->arguments, target->block);
3159
3160 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3161 *node = (pm_index_or_write_node_t) {
3162 {
3163 .type = PM_INDEX_OR_WRITE_NODE,
3164 .flags = target->base.flags,
3165 .node_id = PM_NODE_IDENTIFY(parser),
3166 .location = {
3167 .start = target->base.location.start,
3168 .end = value->location.end
3169 }
3170 },
3171 .receiver = target->receiver,
3172 .call_operator_loc = target->call_operator_loc,
3173 .opening_loc = target->opening_loc,
3174 .arguments = target->arguments,
3175 .closing_loc = target->closing_loc,
3176 .block = (pm_block_argument_node_t *) target->block,
3177 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3178 .value = value
3179 };
3180
3181 // Here we're going to free the target, since it is no longer necessary.
3182 // However, we don't want to call `pm_node_destroy` because we want to keep
3183 // around all of its children since we just reused them.
3184 xfree(target);
3185
3186 return node;
3187}
3188
3193static pm_call_target_node_t *
3194pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3195 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3196
3197 *node = (pm_call_target_node_t) {
3198 {
3199 .type = PM_CALL_TARGET_NODE,
3200 .flags = target->base.flags,
3201 .node_id = PM_NODE_IDENTIFY(parser),
3202 .location = target->base.location
3203 },
3204 .receiver = target->receiver,
3205 .call_operator_loc = target->call_operator_loc,
3206 .name = target->name,
3207 .message_loc = target->message_loc
3208 };
3209
3210 // Here we're going to free the target, since it is no longer necessary.
3211 // However, we don't want to call `pm_node_destroy` because we want to keep
3212 // around all of its children since we just reused them.
3213 xfree(target);
3214
3215 return node;
3216}
3217
3222static pm_index_target_node_t *
3223pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3224 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3225 pm_node_flags_t flags = target->base.flags;
3226
3227 pm_index_arguments_check(parser, target->arguments, target->block);
3228
3229 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3230 *node = (pm_index_target_node_t) {
3231 {
3232 .type = PM_INDEX_TARGET_NODE,
3233 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3234 .node_id = PM_NODE_IDENTIFY(parser),
3235 .location = target->base.location
3236 },
3237 .receiver = target->receiver,
3238 .opening_loc = target->opening_loc,
3239 .arguments = target->arguments,
3240 .closing_loc = target->closing_loc,
3241 .block = (pm_block_argument_node_t *) target->block,
3242 };
3243
3244 // Here we're going to free the target, since it is no longer necessary.
3245 // However, we don't want to call `pm_node_destroy` because we want to keep
3246 // around all of its children since we just reused them.
3247 xfree(target);
3248
3249 return node;
3250}
3251
3255static pm_capture_pattern_node_t *
3256pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3257 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3258
3259 *node = (pm_capture_pattern_node_t) {
3260 {
3261 .type = PM_CAPTURE_PATTERN_NODE,
3262 .node_id = PM_NODE_IDENTIFY(parser),
3263 .location = {
3264 .start = value->location.start,
3265 .end = target->base.location.end
3266 },
3267 },
3268 .value = value,
3269 .target = target,
3270 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3271 };
3272
3273 return node;
3274}
3275
3279static pm_case_node_t *
3280pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3281 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3282
3283 *node = (pm_case_node_t) {
3284 {
3285 .type = PM_CASE_NODE,
3286 .node_id = PM_NODE_IDENTIFY(parser),
3287 .location = {
3288 .start = case_keyword->start,
3289 .end = end_keyword->end
3290 },
3291 },
3292 .predicate = predicate,
3293 .else_clause = NULL,
3294 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3295 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3296 .conditions = { 0 }
3297 };
3298
3299 return node;
3300}
3301
3305static void
3306pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3307 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3308
3309 pm_node_list_append(&node->conditions, condition);
3310 node->base.location.end = condition->location.end;
3311}
3312
3316static void
3317pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3318 node->else_clause = else_clause;
3319 node->base.location.end = else_clause->base.location.end;
3320}
3321
3325static void
3326pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3327 node->base.location.end = end_keyword->end;
3328 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3329}
3330
3334static pm_case_match_node_t *
3335pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3336 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3337
3338 *node = (pm_case_match_node_t) {
3339 {
3340 .type = PM_CASE_MATCH_NODE,
3341 .node_id = PM_NODE_IDENTIFY(parser),
3342 .location = {
3343 .start = case_keyword->start,
3344 .end = end_keyword->end
3345 },
3346 },
3347 .predicate = predicate,
3348 .else_clause = NULL,
3349 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3350 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3351 .conditions = { 0 }
3352 };
3353
3354 return node;
3355}
3356
3360static void
3361pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3362 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3363
3364 pm_node_list_append(&node->conditions, condition);
3365 node->base.location.end = condition->location.end;
3366}
3367
3371static void
3372pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3373 node->else_clause = else_clause;
3374 node->base.location.end = else_clause->base.location.end;
3375}
3376
3380static void
3381pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3382 node->base.location.end = end_keyword->end;
3383 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3384}
3385
3389static pm_class_node_t *
3390pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3391 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3392
3393 *node = (pm_class_node_t) {
3394 {
3395 .type = PM_CLASS_NODE,
3396 .node_id = PM_NODE_IDENTIFY(parser),
3397 .location = { .start = class_keyword->start, .end = end_keyword->end },
3398 },
3399 .locals = *locals,
3400 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3401 .constant_path = constant_path,
3402 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3403 .superclass = superclass,
3404 .body = body,
3405 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3406 .name = pm_parser_constant_id_token(parser, name)
3407 };
3408
3409 return node;
3410}
3411
3415static pm_class_variable_and_write_node_t *
3416pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3417 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3418 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3419
3420 *node = (pm_class_variable_and_write_node_t) {
3421 {
3422 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3423 .node_id = PM_NODE_IDENTIFY(parser),
3424 .location = {
3425 .start = target->base.location.start,
3426 .end = value->location.end
3427 }
3428 },
3429 .name = target->name,
3430 .name_loc = target->base.location,
3431 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3432 .value = value
3433 };
3434
3435 return node;
3436}
3437
3441static pm_class_variable_operator_write_node_t *
3442pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3443 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3444
3445 *node = (pm_class_variable_operator_write_node_t) {
3446 {
3447 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3448 .node_id = PM_NODE_IDENTIFY(parser),
3449 .location = {
3450 .start = target->base.location.start,
3451 .end = value->location.end
3452 }
3453 },
3454 .name = target->name,
3455 .name_loc = target->base.location,
3456 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3457 .value = value,
3458 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3459 };
3460
3461 return node;
3462}
3463
3467static pm_class_variable_or_write_node_t *
3468pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3469 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3470 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3471
3472 *node = (pm_class_variable_or_write_node_t) {
3473 {
3474 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3475 .node_id = PM_NODE_IDENTIFY(parser),
3476 .location = {
3477 .start = target->base.location.start,
3478 .end = value->location.end
3479 }
3480 },
3481 .name = target->name,
3482 .name_loc = target->base.location,
3483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3484 .value = value
3485 };
3486
3487 return node;
3488}
3489
3493static pm_class_variable_read_node_t *
3494pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3495 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3496 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3497
3498 *node = (pm_class_variable_read_node_t) {
3499 {
3500 .type = PM_CLASS_VARIABLE_READ_NODE,
3501 .node_id = PM_NODE_IDENTIFY(parser),
3502 .location = PM_LOCATION_TOKEN_VALUE(token)
3503 },
3504 .name = pm_parser_constant_id_token(parser, token)
3505 };
3506
3507 return node;
3508}
3509
3516static inline pm_node_flags_t
3517pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3518 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3519 return flags;
3520 }
3521 return 0;
3522}
3523
3527static pm_class_variable_write_node_t *
3528pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3529 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3530
3531 *node = (pm_class_variable_write_node_t) {
3532 {
3533 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3534 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3535 .node_id = PM_NODE_IDENTIFY(parser),
3536 .location = {
3537 .start = read_node->base.location.start,
3538 .end = value->location.end
3539 },
3540 },
3541 .name = read_node->name,
3542 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3543 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3544 .value = value
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_path_and_write_node_t *
3554pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3556 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3557
3558 *node = (pm_constant_path_and_write_node_t) {
3559 {
3560 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3561 .node_id = PM_NODE_IDENTIFY(parser),
3562 .location = {
3563 .start = target->base.location.start,
3564 .end = value->location.end
3565 }
3566 },
3567 .target = target,
3568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3569 .value = value
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_path_operator_write_node_t *
3579pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3581
3582 *node = (pm_constant_path_operator_write_node_t) {
3583 {
3584 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3585 .node_id = PM_NODE_IDENTIFY(parser),
3586 .location = {
3587 .start = target->base.location.start,
3588 .end = value->location.end
3589 }
3590 },
3591 .target = target,
3592 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3593 .value = value,
3594 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3595 };
3596
3597 return node;
3598}
3599
3603static pm_constant_path_or_write_node_t *
3604pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3605 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3606 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3607
3608 *node = (pm_constant_path_or_write_node_t) {
3609 {
3610 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3611 .node_id = PM_NODE_IDENTIFY(parser),
3612 .location = {
3613 .start = target->base.location.start,
3614 .end = value->location.end
3615 }
3616 },
3617 .target = target,
3618 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3619 .value = value
3620 };
3621
3622 return node;
3623}
3624
3628static pm_constant_path_node_t *
3629pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3630 pm_assert_value_expression(parser, parent);
3631 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3632
3633 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3634 if (name_token->type == PM_TOKEN_CONSTANT) {
3635 name = pm_parser_constant_id_token(parser, name_token);
3636 }
3637
3638 *node = (pm_constant_path_node_t) {
3639 {
3640 .type = PM_CONSTANT_PATH_NODE,
3641 .node_id = PM_NODE_IDENTIFY(parser),
3642 .location = {
3643 .start = parent == NULL ? delimiter->start : parent->location.start,
3644 .end = name_token->end
3645 },
3646 },
3647 .parent = parent,
3648 .name = name,
3649 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3650 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3651 };
3652
3653 return node;
3654}
3655
3659static pm_constant_path_write_node_t *
3660pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3661 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3662
3663 *node = (pm_constant_path_write_node_t) {
3664 {
3665 .type = PM_CONSTANT_PATH_WRITE_NODE,
3666 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3667 .node_id = PM_NODE_IDENTIFY(parser),
3668 .location = {
3669 .start = target->base.location.start,
3670 .end = value->location.end
3671 },
3672 },
3673 .target = target,
3674 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3675 .value = value
3676 };
3677
3678 return node;
3679}
3680
3684static pm_constant_and_write_node_t *
3685pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3687 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3688
3689 *node = (pm_constant_and_write_node_t) {
3690 {
3691 .type = PM_CONSTANT_AND_WRITE_NODE,
3692 .node_id = PM_NODE_IDENTIFY(parser),
3693 .location = {
3694 .start = target->base.location.start,
3695 .end = value->location.end
3696 }
3697 },
3698 .name = target->name,
3699 .name_loc = target->base.location,
3700 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3701 .value = value
3702 };
3703
3704 return node;
3705}
3706
3710static pm_constant_operator_write_node_t *
3711pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3712 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3713
3714 *node = (pm_constant_operator_write_node_t) {
3715 {
3716 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3717 .node_id = PM_NODE_IDENTIFY(parser),
3718 .location = {
3719 .start = target->base.location.start,
3720 .end = value->location.end
3721 }
3722 },
3723 .name = target->name,
3724 .name_loc = target->base.location,
3725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3726 .value = value,
3727 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3728 };
3729
3730 return node;
3731}
3732
3736static pm_constant_or_write_node_t *
3737pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3738 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3739 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3740
3741 *node = (pm_constant_or_write_node_t) {
3742 {
3743 .type = PM_CONSTANT_OR_WRITE_NODE,
3744 .node_id = PM_NODE_IDENTIFY(parser),
3745 .location = {
3746 .start = target->base.location.start,
3747 .end = value->location.end
3748 }
3749 },
3750 .name = target->name,
3751 .name_loc = target->base.location,
3752 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3753 .value = value
3754 };
3755
3756 return node;
3757}
3758
3762static pm_constant_read_node_t *
3763pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3764 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3765 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3766
3767 *node = (pm_constant_read_node_t) {
3768 {
3769 .type = PM_CONSTANT_READ_NODE,
3770 .node_id = PM_NODE_IDENTIFY(parser),
3771 .location = PM_LOCATION_TOKEN_VALUE(name)
3772 },
3773 .name = pm_parser_constant_id_token(parser, name)
3774 };
3775
3776 return node;
3777}
3778
3782static pm_constant_write_node_t *
3783pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3784 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3785
3786 *node = (pm_constant_write_node_t) {
3787 {
3788 .type = PM_CONSTANT_WRITE_NODE,
3789 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3790 .node_id = PM_NODE_IDENTIFY(parser),
3791 .location = {
3792 .start = target->base.location.start,
3793 .end = value->location.end
3794 }
3795 },
3796 .name = target->name,
3797 .name_loc = target->base.location,
3798 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3799 .value = value
3800 };
3801
3802 return node;
3803}
3804
3808static void
3809pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3810 switch (PM_NODE_TYPE(node)) {
3811 case PM_BEGIN_NODE: {
3812 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3813 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3814 break;
3815 }
3816 case PM_PARENTHESES_NODE: {
3817 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3818 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3819 break;
3820 }
3821 case PM_STATEMENTS_NODE: {
3822 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3823 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3824 break;
3825 }
3826 case PM_ARRAY_NODE:
3827 case PM_FLOAT_NODE:
3828 case PM_IMAGINARY_NODE:
3829 case PM_INTEGER_NODE:
3830 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3831 case PM_INTERPOLATED_STRING_NODE:
3832 case PM_INTERPOLATED_SYMBOL_NODE:
3833 case PM_INTERPOLATED_X_STRING_NODE:
3834 case PM_RATIONAL_NODE:
3835 case PM_REGULAR_EXPRESSION_NODE:
3836 case PM_SOURCE_ENCODING_NODE:
3837 case PM_SOURCE_FILE_NODE:
3838 case PM_SOURCE_LINE_NODE:
3839 case PM_STRING_NODE:
3840 case PM_SYMBOL_NODE:
3841 case PM_X_STRING_NODE:
3842 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3843 break;
3844 default:
3845 break;
3846 }
3847}
3848
3852static pm_def_node_t *
3853pm_def_node_create(
3854 pm_parser_t *parser,
3855 pm_constant_id_t name,
3856 const pm_token_t *name_loc,
3857 pm_node_t *receiver,
3858 pm_parameters_node_t *parameters,
3859 pm_node_t *body,
3860 pm_constant_id_list_t *locals,
3861 const pm_token_t *def_keyword,
3862 const pm_token_t *operator,
3863 const pm_token_t *lparen,
3864 const pm_token_t *rparen,
3865 const pm_token_t *equal,
3866 const pm_token_t *end_keyword
3867) {
3868 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3869 const uint8_t *end;
3870
3871 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3872 end = body->location.end;
3873 } else {
3874 end = end_keyword->end;
3875 }
3876
3877 if (receiver != NULL) {
3878 pm_def_node_receiver_check(parser, receiver);
3879 }
3880
3881 *node = (pm_def_node_t) {
3882 {
3883 .type = PM_DEF_NODE,
3884 .node_id = PM_NODE_IDENTIFY(parser),
3885 .location = { .start = def_keyword->start, .end = end },
3886 },
3887 .name = name,
3888 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3889 .receiver = receiver,
3890 .parameters = parameters,
3891 .body = body,
3892 .locals = *locals,
3893 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3894 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3895 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3896 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3897 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3898 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3899 };
3900
3901 return node;
3902}
3903
3907static pm_defined_node_t *
3908pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3909 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3910
3911 *node = (pm_defined_node_t) {
3912 {
3913 .type = PM_DEFINED_NODE,
3914 .node_id = PM_NODE_IDENTIFY(parser),
3915 .location = {
3916 .start = keyword_loc->start,
3917 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3918 },
3919 },
3920 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3921 .value = value,
3922 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3923 .keyword_loc = *keyword_loc
3924 };
3925
3926 return node;
3927}
3928
3932static pm_else_node_t *
3933pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3934 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3935 const uint8_t *end = NULL;
3936 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3937 end = statements->base.location.end;
3938 } else {
3939 end = end_keyword->end;
3940 }
3941
3942 *node = (pm_else_node_t) {
3943 {
3944 .type = PM_ELSE_NODE,
3945 .node_id = PM_NODE_IDENTIFY(parser),
3946 .location = {
3947 .start = else_keyword->start,
3948 .end = end,
3949 },
3950 },
3951 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3952 .statements = statements,
3953 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3954 };
3955
3956 return node;
3957}
3958
3962static pm_embedded_statements_node_t *
3963pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3964 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3965
3966 *node = (pm_embedded_statements_node_t) {
3967 {
3968 .type = PM_EMBEDDED_STATEMENTS_NODE,
3969 .node_id = PM_NODE_IDENTIFY(parser),
3970 .location = {
3971 .start = opening->start,
3972 .end = closing->end
3973 }
3974 },
3975 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3976 .statements = statements,
3977 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3978 };
3979
3980 return node;
3981}
3982
3986static pm_embedded_variable_node_t *
3987pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3988 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3989
3990 *node = (pm_embedded_variable_node_t) {
3991 {
3992 .type = PM_EMBEDDED_VARIABLE_NODE,
3993 .node_id = PM_NODE_IDENTIFY(parser),
3994 .location = {
3995 .start = operator->start,
3996 .end = variable->location.end
3997 }
3998 },
3999 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4000 .variable = variable
4001 };
4002
4003 return node;
4004}
4005
4009static pm_ensure_node_t *
4010pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4011 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4012
4013 *node = (pm_ensure_node_t) {
4014 {
4015 .type = PM_ENSURE_NODE,
4016 .node_id = PM_NODE_IDENTIFY(parser),
4017 .location = {
4018 .start = ensure_keyword->start,
4019 .end = end_keyword->end
4020 },
4021 },
4022 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4023 .statements = statements,
4024 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4025 };
4026
4027 return node;
4028}
4029
4033static pm_false_node_t *
4034pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4035 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4036 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4037
4038 *node = (pm_false_node_t) {{
4039 .type = PM_FALSE_NODE,
4040 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4041 .node_id = PM_NODE_IDENTIFY(parser),
4042 .location = PM_LOCATION_TOKEN_VALUE(token)
4043 }};
4044
4045 return node;
4046}
4047
4052static pm_find_pattern_node_t *
4053pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4054 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4055
4056 pm_node_t *left = nodes->nodes[0];
4057 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4058 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4059
4060 pm_node_t *right;
4061
4062 if (nodes->size == 1) {
4063 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4064 } else {
4065 right = nodes->nodes[nodes->size - 1];
4066 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4067 }
4068
4069#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4070 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4071 // The resulting AST will anyway be ignored, but this file still needs to compile.
4072 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4073#else
4074 pm_node_t *right_splat_node = right;
4075#endif
4076 *node = (pm_find_pattern_node_t) {
4077 {
4078 .type = PM_FIND_PATTERN_NODE,
4079 .node_id = PM_NODE_IDENTIFY(parser),
4080 .location = {
4081 .start = left->location.start,
4082 .end = right->location.end,
4083 },
4084 },
4085 .constant = NULL,
4086 .left = left_splat_node,
4087 .right = right_splat_node,
4088 .requireds = { 0 },
4089 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4090 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4091 };
4092
4093 // For now we're going to just copy over each pointer manually. This could be
4094 // much more efficient, as we could instead resize the node list to only point
4095 // to 1...-1.
4096 for (size_t index = 1; index < nodes->size - 1; index++) {
4097 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4098 }
4099
4100 return node;
4101}
4102
4107static double
4108pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4109 ptrdiff_t diff = token->end - token->start;
4110 if (diff <= 0) return 0.0;
4111
4112 // First, get a buffer of the content.
4113 size_t length = (size_t) diff;
4114 char *buffer = xmalloc(sizeof(char) * (length + 1));
4115 memcpy((void *) buffer, token->start, length);
4116
4117 // Next, determine if we need to replace the decimal point because of
4118 // locale-specific options, and then normalize them if we have to.
4119 char decimal_point = *localeconv()->decimal_point;
4120 if (decimal_point != '.') {
4121 for (size_t index = 0; index < length; index++) {
4122 if (buffer[index] == '.') buffer[index] = decimal_point;
4123 }
4124 }
4125
4126 // Next, handle underscores by removing them from the buffer.
4127 for (size_t index = 0; index < length; index++) {
4128 if (buffer[index] == '_') {
4129 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4130 length--;
4131 }
4132 }
4133
4134 // Null-terminate the buffer so that strtod cannot read off the end.
4135 buffer[length] = '\0';
4136
4137 // Now, call strtod to parse the value. Note that CRuby has their own
4138 // version of strtod which avoids locales. We're okay using the locale-aware
4139 // version because we've already validated through the parser that the token
4140 // is in a valid format.
4141 errno = 0;
4142 char *eptr;
4143 double value = strtod(buffer, &eptr);
4144
4145 // This should never happen, because we've already checked that the token
4146 // is in a valid format. However it's good to be safe.
4147 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4148 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4149 xfree((void *) buffer);
4150 return 0.0;
4151 }
4152
4153 // If errno is set, then it should only be ERANGE. At this point we need to
4154 // check if it's infinity (it should be).
4155 if (errno == ERANGE && PRISM_ISINF(value)) {
4156 int warn_width;
4157 const char *ellipsis;
4158
4159 if (length > 20) {
4160 warn_width = 20;
4161 ellipsis = "...";
4162 } else {
4163 warn_width = (int) length;
4164 ellipsis = "";
4165 }
4166
4167 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4168 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4169 }
4170
4171 // Finally we can free the buffer and return the value.
4172 xfree((void *) buffer);
4173 return value;
4174}
4175
4179static pm_float_node_t *
4180pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4181 assert(token->type == PM_TOKEN_FLOAT);
4182 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4183
4184 *node = (pm_float_node_t) {
4185 {
4186 .type = PM_FLOAT_NODE,
4187 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4188 .node_id = PM_NODE_IDENTIFY(parser),
4189 .location = PM_LOCATION_TOKEN_VALUE(token)
4190 },
4191 .value = pm_double_parse(parser, token)
4192 };
4193
4194 return node;
4195}
4196
4200static pm_imaginary_node_t *
4201pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4202 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4203
4204 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4205 *node = (pm_imaginary_node_t) {
4206 {
4207 .type = PM_IMAGINARY_NODE,
4208 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4209 .node_id = PM_NODE_IDENTIFY(parser),
4210 .location = PM_LOCATION_TOKEN_VALUE(token)
4211 },
4212 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4213 .type = PM_TOKEN_FLOAT,
4214 .start = token->start,
4215 .end = token->end - 1
4216 }))
4217 };
4218
4219 return node;
4220}
4221
4225static pm_rational_node_t *
4226pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4227 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4228
4229 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4230 *node = (pm_rational_node_t) {
4231 {
4232 .type = PM_RATIONAL_NODE,
4233 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4234 .node_id = PM_NODE_IDENTIFY(parser),
4235 .location = PM_LOCATION_TOKEN_VALUE(token)
4236 },
4237 .numerator = { 0 },
4238 .denominator = { 0 }
4239 };
4240
4241 const uint8_t *start = token->start;
4242 const uint8_t *end = token->end - 1; // r
4243
4244 while (start < end && *start == '0') start++; // 0.1 -> .1
4245 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4246
4247 size_t length = (size_t) (end - start);
4248 if (length == 1) {
4249 node->denominator.value = 1;
4250 return node;
4251 }
4252
4253 const uint8_t *point = memchr(start, '.', length);
4254 assert(point && "should have a decimal point");
4255
4256 uint8_t *digits = xmalloc(length);
4257 if (digits == NULL) {
4258 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259 abort();
4260 }
4261
4262 memcpy(digits, start, (unsigned long) (point - start));
4263 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4264 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4265
4266 digits[0] = '1';
4267 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269 xfree(digits);
4270
4271 pm_integers_reduce(&node->numerator, &node->denominator);
4272 return node;
4273}
4274
4279static pm_imaginary_node_t *
4280pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4281 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4282
4283 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4284 *node = (pm_imaginary_node_t) {
4285 {
4286 .type = PM_IMAGINARY_NODE,
4287 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4288 .node_id = PM_NODE_IDENTIFY(parser),
4289 .location = PM_LOCATION_TOKEN_VALUE(token)
4290 },
4291 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4292 .type = PM_TOKEN_FLOAT_RATIONAL,
4293 .start = token->start,
4294 .end = token->end - 1
4295 }))
4296 };
4297
4298 return node;
4299}
4300
4304static pm_for_node_t *
4305pm_for_node_create(
4306 pm_parser_t *parser,
4307 pm_node_t *index,
4308 pm_node_t *collection,
4309 pm_statements_node_t *statements,
4310 const pm_token_t *for_keyword,
4311 const pm_token_t *in_keyword,
4312 const pm_token_t *do_keyword,
4313 const pm_token_t *end_keyword
4314) {
4315 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4316
4317 *node = (pm_for_node_t) {
4318 {
4319 .type = PM_FOR_NODE,
4320 .node_id = PM_NODE_IDENTIFY(parser),
4321 .location = {
4322 .start = for_keyword->start,
4323 .end = end_keyword->end
4324 },
4325 },
4326 .index = index,
4327 .collection = collection,
4328 .statements = statements,
4329 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4330 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4331 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4332 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4333 };
4334
4335 return node;
4336}
4337
4341static pm_forwarding_arguments_node_t *
4342pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4343 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4344 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4345
4346 *node = (pm_forwarding_arguments_node_t) {{
4347 .type = PM_FORWARDING_ARGUMENTS_NODE,
4348 .node_id = PM_NODE_IDENTIFY(parser),
4349 .location = PM_LOCATION_TOKEN_VALUE(token)
4350 }};
4351
4352 return node;
4353}
4354
4358static pm_forwarding_parameter_node_t *
4359pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4360 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4361 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4362
4363 *node = (pm_forwarding_parameter_node_t) {{
4364 .type = PM_FORWARDING_PARAMETER_NODE,
4365 .node_id = PM_NODE_IDENTIFY(parser),
4366 .location = PM_LOCATION_TOKEN_VALUE(token)
4367 }};
4368
4369 return node;
4370}
4371
4375static pm_forwarding_super_node_t *
4376pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4377 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4378 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4379 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4380
4381 pm_block_node_t *block = NULL;
4382 if (arguments->block != NULL) {
4383 block = (pm_block_node_t *) arguments->block;
4384 }
4385
4386 *node = (pm_forwarding_super_node_t) {
4387 {
4388 .type = PM_FORWARDING_SUPER_NODE,
4389 .node_id = PM_NODE_IDENTIFY(parser),
4390 .location = {
4391 .start = token->start,
4392 .end = block != NULL ? block->base.location.end : token->end
4393 },
4394 },
4395 .block = block
4396 };
4397
4398 return node;
4399}
4400
4405static pm_hash_pattern_node_t *
4406pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4407 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4408
4409 *node = (pm_hash_pattern_node_t) {
4410 {
4411 .type = PM_HASH_PATTERN_NODE,
4412 .node_id = PM_NODE_IDENTIFY(parser),
4413 .location = {
4414 .start = opening->start,
4415 .end = closing->end
4416 },
4417 },
4418 .constant = NULL,
4419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4421 .elements = { 0 },
4422 .rest = NULL
4423 };
4424
4425 return node;
4426}
4427
4431static pm_hash_pattern_node_t *
4432pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4433 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4434
4435 const uint8_t *start;
4436 const uint8_t *end;
4437
4438 if (elements->size > 0) {
4439 if (rest) {
4440 start = elements->nodes[0]->location.start;
4441 end = rest->location.end;
4442 } else {
4443 start = elements->nodes[0]->location.start;
4444 end = elements->nodes[elements->size - 1]->location.end;
4445 }
4446 } else {
4447 assert(rest != NULL);
4448 start = rest->location.start;
4449 end = rest->location.end;
4450 }
4451
4452 *node = (pm_hash_pattern_node_t) {
4453 {
4454 .type = PM_HASH_PATTERN_NODE,
4455 .node_id = PM_NODE_IDENTIFY(parser),
4456 .location = {
4457 .start = start,
4458 .end = end
4459 },
4460 },
4461 .constant = NULL,
4462 .elements = { 0 },
4463 .rest = rest,
4464 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4465 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4466 };
4467
4468 pm_node_t *element;
4469 PM_NODE_LIST_FOREACH(elements, index, element) {
4470 pm_node_list_append(&node->elements, element);
4471 }
4472
4473 return node;
4474}
4475
4479static pm_constant_id_t
4480pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4481 switch (PM_NODE_TYPE(target)) {
4482 case PM_GLOBAL_VARIABLE_READ_NODE:
4483 return ((pm_global_variable_read_node_t *) target)->name;
4484 case PM_BACK_REFERENCE_READ_NODE:
4485 return ((pm_back_reference_read_node_t *) target)->name;
4486 case PM_NUMBERED_REFERENCE_READ_NODE:
4487 // This will only ever happen in the event of a syntax error, but we
4488 // still need to provide something for the node.
4489 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4490 default:
4491 assert(false && "unreachable");
4492 return (pm_constant_id_t) -1;
4493 }
4494}
4495
4499static pm_global_variable_and_write_node_t *
4500pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4501 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4502 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4503
4504 *node = (pm_global_variable_and_write_node_t) {
4505 {
4506 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4507 .node_id = PM_NODE_IDENTIFY(parser),
4508 .location = {
4509 .start = target->location.start,
4510 .end = value->location.end
4511 }
4512 },
4513 .name = pm_global_variable_write_name(parser, target),
4514 .name_loc = target->location,
4515 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4516 .value = value
4517 };
4518
4519 return node;
4520}
4521
4525static pm_global_variable_operator_write_node_t *
4526pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4527 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4528
4529 *node = (pm_global_variable_operator_write_node_t) {
4530 {
4531 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4532 .node_id = PM_NODE_IDENTIFY(parser),
4533 .location = {
4534 .start = target->location.start,
4535 .end = value->location.end
4536 }
4537 },
4538 .name = pm_global_variable_write_name(parser, target),
4539 .name_loc = target->location,
4540 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4541 .value = value,
4542 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4543 };
4544
4545 return node;
4546}
4547
4551static pm_global_variable_or_write_node_t *
4552pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4554 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4555
4556 *node = (pm_global_variable_or_write_node_t) {
4557 {
4558 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4559 .node_id = PM_NODE_IDENTIFY(parser),
4560 .location = {
4561 .start = target->location.start,
4562 .end = value->location.end
4563 }
4564 },
4565 .name = pm_global_variable_write_name(parser, target),
4566 .name_loc = target->location,
4567 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4568 .value = value
4569 };
4570
4571 return node;
4572}
4573
4577static pm_global_variable_read_node_t *
4578pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4579 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4580
4581 *node = (pm_global_variable_read_node_t) {
4582 {
4583 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4584 .node_id = PM_NODE_IDENTIFY(parser),
4585 .location = PM_LOCATION_TOKEN_VALUE(name),
4586 },
4587 .name = pm_parser_constant_id_token(parser, name)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_global_variable_read_node_t *
4597pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4598 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4599
4600 *node = (pm_global_variable_read_node_t) {
4601 {
4602 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4603 .node_id = PM_NODE_IDENTIFY(parser),
4604 .location = PM_LOCATION_NULL_VALUE(parser)
4605 },
4606 .name = name
4607 };
4608
4609 return node;
4610}
4611
4615static pm_global_variable_write_node_t *
4616pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4618
4619 *node = (pm_global_variable_write_node_t) {
4620 {
4621 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4622 .node_id = PM_NODE_IDENTIFY(parser),
4623 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4624 .location = {
4625 .start = target->location.start,
4626 .end = value->location.end
4627 },
4628 },
4629 .name = pm_global_variable_write_name(parser, target),
4630 .name_loc = PM_LOCATION_NODE_VALUE(target),
4631 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4632 .value = value
4633 };
4634
4635 return node;
4636}
4637
4641static pm_global_variable_write_node_t *
4642pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4643 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4644
4645 *node = (pm_global_variable_write_node_t) {
4646 {
4647 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4648 .node_id = PM_NODE_IDENTIFY(parser),
4649 .location = PM_LOCATION_NULL_VALUE(parser)
4650 },
4651 .name = name,
4652 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4653 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .value = value
4655 };
4656
4657 return node;
4658}
4659
4663static pm_hash_node_t *
4664pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4665 assert(opening != NULL);
4666 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4667
4668 *node = (pm_hash_node_t) {
4669 {
4670 .type = PM_HASH_NODE,
4671 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4672 .node_id = PM_NODE_IDENTIFY(parser),
4673 .location = PM_LOCATION_TOKEN_VALUE(opening)
4674 },
4675 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4676 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4677 .elements = { 0 }
4678 };
4679
4680 return node;
4681}
4682
4686static inline void
4687pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4688 pm_node_list_append(&hash->elements, element);
4689
4690 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4691 if (static_literal) {
4692 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4693 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4694 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4696 }
4697
4698 if (!static_literal) {
4699 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4700 }
4701}
4702
4703static inline void
4704pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4705 hash->base.location.end = token->end;
4706 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4707}
4708
4712static pm_if_node_t *
4713pm_if_node_create(pm_parser_t *parser,
4714 const pm_token_t *if_keyword,
4715 pm_node_t *predicate,
4716 const pm_token_t *then_keyword,
4717 pm_statements_node_t *statements,
4718 pm_node_t *subsequent,
4719 const pm_token_t *end_keyword
4720) {
4721 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4722 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4723
4724 const uint8_t *end;
4725 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4726 end = end_keyword->end;
4727 } else if (subsequent != NULL) {
4728 end = subsequent->location.end;
4729 } else if (pm_statements_node_body_length(statements) != 0) {
4730 end = statements->base.location.end;
4731 } else {
4732 end = predicate->location.end;
4733 }
4734
4735 *node = (pm_if_node_t) {
4736 {
4737 .type = PM_IF_NODE,
4738 .flags = PM_NODE_FLAG_NEWLINE,
4739 .node_id = PM_NODE_IDENTIFY(parser),
4740 .location = {
4741 .start = if_keyword->start,
4742 .end = end
4743 },
4744 },
4745 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4746 .predicate = predicate,
4747 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4748 .statements = statements,
4749 .subsequent = subsequent,
4750 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4751 };
4752
4753 return node;
4754}
4755
4759static pm_if_node_t *
4760pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4761 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4762 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4763
4764 pm_statements_node_t *statements = pm_statements_node_create(parser);
4765 pm_statements_node_body_append(parser, statements, statement, true);
4766
4767 *node = (pm_if_node_t) {
4768 {
4769 .type = PM_IF_NODE,
4770 .flags = PM_NODE_FLAG_NEWLINE,
4771 .node_id = PM_NODE_IDENTIFY(parser),
4772 .location = {
4773 .start = statement->location.start,
4774 .end = predicate->location.end
4775 },
4776 },
4777 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4778 .predicate = predicate,
4779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4780 .statements = statements,
4781 .subsequent = NULL,
4782 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4783 };
4784
4785 return node;
4786}
4787
4791static pm_if_node_t *
4792pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4793 pm_assert_value_expression(parser, predicate);
4794 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4795
4796 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4797 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4798
4799 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4800 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4801
4802 pm_token_t end_keyword = not_provided(parser);
4803 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4804
4805 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4806
4807 *node = (pm_if_node_t) {
4808 {
4809 .type = PM_IF_NODE,
4810 .flags = PM_NODE_FLAG_NEWLINE,
4811 .node_id = PM_NODE_IDENTIFY(parser),
4812 .location = {
4813 .start = predicate->location.start,
4814 .end = false_expression->location.end,
4815 },
4816 },
4817 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4818 .predicate = predicate,
4819 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4820 .statements = if_statements,
4821 .subsequent = (pm_node_t *) else_node,
4822 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4823 };
4824
4825 return node;
4826
4827}
4828
4829static inline void
4830pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4831 node->base.location.end = keyword->end;
4832 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4833}
4834
4835static inline void
4836pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4837 node->base.location.end = keyword->end;
4838 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4839}
4840
4844static pm_implicit_node_t *
4845pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4846 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4847
4848 *node = (pm_implicit_node_t) {
4849 {
4850 .type = PM_IMPLICIT_NODE,
4851 .node_id = PM_NODE_IDENTIFY(parser),
4852 .location = value->location
4853 },
4854 .value = value
4855 };
4856
4857 return node;
4858}
4859
4863static pm_implicit_rest_node_t *
4864pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4865 assert(token->type == PM_TOKEN_COMMA);
4866
4867 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4868
4869 *node = (pm_implicit_rest_node_t) {
4870 {
4871 .type = PM_IMPLICIT_REST_NODE,
4872 .node_id = PM_NODE_IDENTIFY(parser),
4873 .location = PM_LOCATION_TOKEN_VALUE(token)
4874 }
4875 };
4876
4877 return node;
4878}
4879
4883static pm_integer_node_t *
4884pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4885 assert(token->type == PM_TOKEN_INTEGER);
4886 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4887
4888 *node = (pm_integer_node_t) {
4889 {
4890 .type = PM_INTEGER_NODE,
4891 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4892 .node_id = PM_NODE_IDENTIFY(parser),
4893 .location = PM_LOCATION_TOKEN_VALUE(token)
4894 },
4895 .value = { 0 }
4896 };
4897
4898 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4899 switch (base) {
4900 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4901 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4902 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4903 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4904 default: assert(false && "unreachable"); break;
4905 }
4906
4907 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4908 return node;
4909}
4910
4915static pm_imaginary_node_t *
4916pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4917 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4918
4919 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4920 *node = (pm_imaginary_node_t) {
4921 {
4922 .type = PM_IMAGINARY_NODE,
4923 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4924 .node_id = PM_NODE_IDENTIFY(parser),
4925 .location = PM_LOCATION_TOKEN_VALUE(token)
4926 },
4927 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4928 .type = PM_TOKEN_INTEGER,
4929 .start = token->start,
4930 .end = token->end - 1
4931 }))
4932 };
4933
4934 return node;
4935}
4936
4941static pm_rational_node_t *
4942pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4943 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4944
4945 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4946 *node = (pm_rational_node_t) {
4947 {
4948 .type = PM_RATIONAL_NODE,
4949 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4950 .node_id = PM_NODE_IDENTIFY(parser),
4951 .location = PM_LOCATION_TOKEN_VALUE(token)
4952 },
4953 .numerator = { 0 },
4954 .denominator = { .value = 1, 0 }
4955 };
4956
4957 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4958 switch (base) {
4959 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4960 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4961 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4962 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4963 default: assert(false && "unreachable"); break;
4964 }
4965
4966 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4967
4968 return node;
4969}
4970
4975static pm_imaginary_node_t *
4976pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4977 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4978
4979 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4980 *node = (pm_imaginary_node_t) {
4981 {
4982 .type = PM_IMAGINARY_NODE,
4983 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4984 .node_id = PM_NODE_IDENTIFY(parser),
4985 .location = PM_LOCATION_TOKEN_VALUE(token)
4986 },
4987 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4988 .type = PM_TOKEN_INTEGER_RATIONAL,
4989 .start = token->start,
4990 .end = token->end - 1
4991 }))
4992 };
4993
4994 return node;
4995}
4996
5000static pm_in_node_t *
5001pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5002 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5003
5004 const uint8_t *end;
5005 if (statements != NULL) {
5006 end = statements->base.location.end;
5007 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5008 end = then_keyword->end;
5009 } else {
5010 end = pattern->location.end;
5011 }
5012
5013 *node = (pm_in_node_t) {
5014 {
5015 .type = PM_IN_NODE,
5016 .node_id = PM_NODE_IDENTIFY(parser),
5017 .location = {
5018 .start = in_keyword->start,
5019 .end = end
5020 },
5021 },
5022 .pattern = pattern,
5023 .statements = statements,
5024 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5025 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5026 };
5027
5028 return node;
5029}
5030
5034static pm_instance_variable_and_write_node_t *
5035pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5036 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5037 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5038
5039 *node = (pm_instance_variable_and_write_node_t) {
5040 {
5041 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5042 .node_id = PM_NODE_IDENTIFY(parser),
5043 .location = {
5044 .start = target->base.location.start,
5045 .end = value->location.end
5046 }
5047 },
5048 .name = target->name,
5049 .name_loc = target->base.location,
5050 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5051 .value = value
5052 };
5053
5054 return node;
5055}
5056
5060static pm_instance_variable_operator_write_node_t *
5061pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5062 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5063
5064 *node = (pm_instance_variable_operator_write_node_t) {
5065 {
5066 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5067 .node_id = PM_NODE_IDENTIFY(parser),
5068 .location = {
5069 .start = target->base.location.start,
5070 .end = value->location.end
5071 }
5072 },
5073 .name = target->name,
5074 .name_loc = target->base.location,
5075 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5076 .value = value,
5077 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5078 };
5079
5080 return node;
5081}
5082
5086static pm_instance_variable_or_write_node_t *
5087pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5088 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5089 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5090
5091 *node = (pm_instance_variable_or_write_node_t) {
5092 {
5093 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5094 .node_id = PM_NODE_IDENTIFY(parser),
5095 .location = {
5096 .start = target->base.location.start,
5097 .end = value->location.end
5098 }
5099 },
5100 .name = target->name,
5101 .name_loc = target->base.location,
5102 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5103 .value = value
5104 };
5105
5106 return node;
5107}
5108
5112static pm_instance_variable_read_node_t *
5113pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5114 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5115 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5116
5117 *node = (pm_instance_variable_read_node_t) {
5118 {
5119 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5120 .node_id = PM_NODE_IDENTIFY(parser),
5121 .location = PM_LOCATION_TOKEN_VALUE(token)
5122 },
5123 .name = pm_parser_constant_id_token(parser, token)
5124 };
5125
5126 return node;
5127}
5128
5133static pm_instance_variable_write_node_t *
5134pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5135 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5136 *node = (pm_instance_variable_write_node_t) {
5137 {
5138 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5139 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5140 .node_id = PM_NODE_IDENTIFY(parser),
5141 .location = {
5142 .start = read_node->base.location.start,
5143 .end = value->location.end
5144 }
5145 },
5146 .name = read_node->name,
5147 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5148 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5149 .value = value
5150 };
5151
5152 return node;
5153}
5154
5160static void
5161pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5162 switch (PM_NODE_TYPE(part)) {
5163 case PM_STRING_NODE:
5164 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5165 break;
5166 case PM_EMBEDDED_STATEMENTS_NODE: {
5167 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5168 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5169
5170 if (embedded == NULL) {
5171 // If there are no statements or more than one statement, then
5172 // we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5175 // If the embedded statement is a string, then we can keep the
5176 // static literal flag and mark the string as frozen.
5177 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5178 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5179 // If the embedded statement is an interpolated string and it's
5180 // a static literal, then we can keep the static literal flag.
5181 } else {
5182 // Otherwise we lose the static literal flag.
5183 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5184 }
5185
5186 break;
5187 }
5188 case PM_EMBEDDED_VARIABLE_NODE:
5189 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5190 break;
5191 default:
5192 assert(false && "unexpected node type");
5193 break;
5194 }
5195
5196 pm_node_list_append(parts, part);
5197}
5198
5202static pm_interpolated_regular_expression_node_t *
5203pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5204 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5205
5206 *node = (pm_interpolated_regular_expression_node_t) {
5207 {
5208 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5210 .node_id = PM_NODE_IDENTIFY(parser),
5211 .location = {
5212 .start = opening->start,
5213 .end = NULL,
5214 },
5215 },
5216 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5217 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .parts = { 0 }
5219 };
5220
5221 return node;
5222}
5223
5224static inline void
5225pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5226 if (node->base.location.start > part->location.start) {
5227 node->base.location.start = part->location.start;
5228 }
5229 if (node->base.location.end < part->location.end) {
5230 node->base.location.end = part->location.end;
5231 }
5232
5233 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5234}
5235
5236static inline void
5237pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5238 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5239 node->base.location.end = closing->end;
5240 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5241}
5242
5266static inline void
5267pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5268#define CLEAR_FLAGS(node) \
5269 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5270
5271#define MUTABLE_FLAGS(node) \
5272 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5273
5274 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5275 node->base.location.start = part->location.start;
5276 }
5277
5278 node->base.location.end = MAX(node->base.location.end, part->location.end);
5279
5280 switch (PM_NODE_TYPE(part)) {
5281 case PM_STRING_NODE:
5282 // If inner string is not frozen, clear flags for this string
5283 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5284 CLEAR_FLAGS(node);
5285 }
5286 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5287 break;
5288 case PM_INTERPOLATED_STRING_NODE:
5289 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5290 // If the string that we're concatenating is a static literal,
5291 // then we can keep the static literal flag for this string.
5292 } else {
5293 // Otherwise, we lose the static literal flag here and we should
5294 // also clear the mutability flags.
5295 CLEAR_FLAGS(node);
5296 }
5297 break;
5298 case PM_EMBEDDED_STATEMENTS_NODE: {
5299 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5300 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5301
5302 if (embedded == NULL) {
5303 // If we're embedding multiple statements or no statements, then
5304 // the string is not longer a static literal.
5305 CLEAR_FLAGS(node);
5306 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5307 // If the embedded statement is a string, then we can make that
5308 // string as frozen and static literal, and not touch the static
5309 // literal status of this string.
5310 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5311
5312 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5313 MUTABLE_FLAGS(node);
5314 }
5315 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 // If the embedded statement is an interpolated string, but that
5317 // string is marked as static literal, then we can keep our
5318 // static literal status for this string.
5319 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5320 MUTABLE_FLAGS(node);
5321 }
5322 } else {
5323 // In all other cases, we lose the static literal flag here and
5324 // become mutable.
5325 CLEAR_FLAGS(node);
5326 }
5327
5328 break;
5329 }
5330 case PM_EMBEDDED_VARIABLE_NODE:
5331 // Embedded variables clear static literal, which means we also
5332 // should clear the mutability flags.
5333 CLEAR_FLAGS(node);
5334 break;
5335 case PM_X_STRING_NODE:
5336 case PM_INTERPOLATED_X_STRING_NODE:
5337 // If this is an x string, then this is a syntax error. But we want
5338 // to handle it here so that we don't fail the assertion.
5339 CLEAR_FLAGS(node);
5340 break;
5341 default:
5342 assert(false && "unexpected node type");
5343 break;
5344 }
5345
5346 pm_node_list_append(&node->parts, part);
5347
5348#undef CLEAR_FLAGS
5349#undef MUTABLE_FLAGS
5350}
5351
5355static pm_interpolated_string_node_t *
5356pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5357 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5358 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5359
5360 switch (parser->frozen_string_literal) {
5361 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5362 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5363 break;
5364 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5365 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5366 break;
5367 }
5368
5369 *node = (pm_interpolated_string_node_t) {
5370 {
5371 .type = PM_INTERPOLATED_STRING_NODE,
5372 .flags = flags,
5373 .node_id = PM_NODE_IDENTIFY(parser),
5374 .location = {
5375 .start = opening->start,
5376 .end = closing->end,
5377 },
5378 },
5379 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5380 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5381 .parts = { 0 }
5382 };
5383
5384 if (parts != NULL) {
5385 pm_node_t *part;
5386 PM_NODE_LIST_FOREACH(parts, index, part) {
5387 pm_interpolated_string_node_append(node, part);
5388 }
5389 }
5390
5391 return node;
5392}
5393
5397static void
5398pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5399 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5400 node->base.location.end = closing->end;
5401}
5402
5403static void
5404pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5405 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5406 node->base.location.start = part->location.start;
5407 }
5408
5409 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5410 node->base.location.end = MAX(node->base.location.end, part->location.end);
5411}
5412
5413static void
5414pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5415 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5416 node->base.location.end = closing->end;
5417}
5418
5422static pm_interpolated_symbol_node_t *
5423pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5424 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5425
5426 *node = (pm_interpolated_symbol_node_t) {
5427 {
5428 .type = PM_INTERPOLATED_SYMBOL_NODE,
5429 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5430 .node_id = PM_NODE_IDENTIFY(parser),
5431 .location = {
5432 .start = opening->start,
5433 .end = closing->end,
5434 },
5435 },
5436 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5437 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5438 .parts = { 0 }
5439 };
5440
5441 if (parts != NULL) {
5442 pm_node_t *part;
5443 PM_NODE_LIST_FOREACH(parts, index, part) {
5444 pm_interpolated_symbol_node_append(node, part);
5445 }
5446 }
5447
5448 return node;
5449}
5450
5454static pm_interpolated_x_string_node_t *
5455pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5456 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5457
5458 *node = (pm_interpolated_x_string_node_t) {
5459 {
5460 .type = PM_INTERPOLATED_X_STRING_NODE,
5461 .node_id = PM_NODE_IDENTIFY(parser),
5462 .location = {
5463 .start = opening->start,
5464 .end = closing->end
5465 },
5466 },
5467 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5468 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5469 .parts = { 0 }
5470 };
5471
5472 return node;
5473}
5474
5475static inline void
5476pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5477 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5478 node->base.location.end = part->location.end;
5479}
5480
5481static inline void
5482pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5483 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5484 node->base.location.end = closing->end;
5485}
5486
5490static pm_it_local_variable_read_node_t *
5491pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5492 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5493
5494 *node = (pm_it_local_variable_read_node_t) {
5495 {
5496 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5497 .node_id = PM_NODE_IDENTIFY(parser),
5498 .location = PM_LOCATION_TOKEN_VALUE(name)
5499 }
5500 };
5501
5502 return node;
5503}
5504
5508static pm_it_parameters_node_t *
5509pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5510 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5511
5512 *node = (pm_it_parameters_node_t) {
5513 {
5514 .type = PM_IT_PARAMETERS_NODE,
5515 .node_id = PM_NODE_IDENTIFY(parser),
5516 .location = {
5517 .start = opening->start,
5518 .end = closing->end
5519 }
5520 }
5521 };
5522
5523 return node;
5524}
5525
5529static pm_keyword_hash_node_t *
5530pm_keyword_hash_node_create(pm_parser_t *parser) {
5531 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5532
5533 *node = (pm_keyword_hash_node_t) {
5534 .base = {
5535 .type = PM_KEYWORD_HASH_NODE,
5536 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5537 .node_id = PM_NODE_IDENTIFY(parser),
5538 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5539 },
5540 .elements = { 0 }
5541 };
5542
5543 return node;
5544}
5545
5549static void
5550pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5551 // If the element being added is not an AssocNode or does not have a symbol
5552 // key, then we want to turn the SYMBOL_KEYS flag off.
5553 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5554 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5555 }
5556
5557 pm_node_list_append(&hash->elements, element);
5558 if (hash->base.location.start == NULL) {
5559 hash->base.location.start = element->location.start;
5560 }
5561 hash->base.location.end = element->location.end;
5562}
5563
5567static pm_required_keyword_parameter_node_t *
5568pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5569 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5570
5571 *node = (pm_required_keyword_parameter_node_t) {
5572 {
5573 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5574 .node_id = PM_NODE_IDENTIFY(parser),
5575 .location = {
5576 .start = name->start,
5577 .end = name->end
5578 },
5579 },
5580 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5581 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5582 };
5583
5584 return node;
5585}
5586
5590static pm_optional_keyword_parameter_node_t *
5591pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5592 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5593
5594 *node = (pm_optional_keyword_parameter_node_t) {
5595 {
5596 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5597 .node_id = PM_NODE_IDENTIFY(parser),
5598 .location = {
5599 .start = name->start,
5600 .end = value->location.end
5601 },
5602 },
5603 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5604 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5605 .value = value
5606 };
5607
5608 return node;
5609}
5610
5614static pm_keyword_rest_parameter_node_t *
5615pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5616 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5617
5618 *node = (pm_keyword_rest_parameter_node_t) {
5619 {
5620 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5621 .node_id = PM_NODE_IDENTIFY(parser),
5622 .location = {
5623 .start = operator->start,
5624 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5625 },
5626 },
5627 .name = pm_parser_optional_constant_id_token(parser, name),
5628 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5629 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5630 };
5631
5632 return node;
5633}
5634
5638static pm_lambda_node_t *
5639pm_lambda_node_create(
5640 pm_parser_t *parser,
5641 pm_constant_id_list_t *locals,
5642 const pm_token_t *operator,
5643 const pm_token_t *opening,
5644 const pm_token_t *closing,
5645 pm_node_t *parameters,
5646 pm_node_t *body
5647) {
5648 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5649
5650 *node = (pm_lambda_node_t) {
5651 {
5652 .type = PM_LAMBDA_NODE,
5653 .node_id = PM_NODE_IDENTIFY(parser),
5654 .location = {
5655 .start = operator->start,
5656 .end = closing->end
5657 },
5658 },
5659 .locals = *locals,
5660 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5661 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5662 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5663 .parameters = parameters,
5664 .body = body
5665 };
5666
5667 return node;
5668}
5669
5673static pm_local_variable_and_write_node_t *
5674pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5675 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5676 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5677 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5678
5679 *node = (pm_local_variable_and_write_node_t) {
5680 {
5681 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5682 .node_id = PM_NODE_IDENTIFY(parser),
5683 .location = {
5684 .start = target->location.start,
5685 .end = value->location.end
5686 }
5687 },
5688 .name_loc = target->location,
5689 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5690 .value = value,
5691 .name = name,
5692 .depth = depth
5693 };
5694
5695 return node;
5696}
5697
5701static pm_local_variable_operator_write_node_t *
5702pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5703 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5704
5705 *node = (pm_local_variable_operator_write_node_t) {
5706 {
5707 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5708 .node_id = PM_NODE_IDENTIFY(parser),
5709 .location = {
5710 .start = target->location.start,
5711 .end = value->location.end
5712 }
5713 },
5714 .name_loc = target->location,
5715 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5716 .value = value,
5717 .name = name,
5718 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5719 .depth = depth
5720 };
5721
5722 return node;
5723}
5724
5728static pm_local_variable_or_write_node_t *
5729pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5730 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5731 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5732 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5733
5734 *node = (pm_local_variable_or_write_node_t) {
5735 {
5736 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5737 .node_id = PM_NODE_IDENTIFY(parser),
5738 .location = {
5739 .start = target->location.start,
5740 .end = value->location.end
5741 }
5742 },
5743 .name_loc = target->location,
5744 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5745 .value = value,
5746 .name = name,
5747 .depth = depth
5748 };
5749
5750 return node;
5751}
5752
5756static pm_local_variable_read_node_t *
5757pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5758 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5759
5760 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5761
5762 *node = (pm_local_variable_read_node_t) {
5763 {
5764 .type = PM_LOCAL_VARIABLE_READ_NODE,
5765 .node_id = PM_NODE_IDENTIFY(parser),
5766 .location = PM_LOCATION_TOKEN_VALUE(name)
5767 },
5768 .name = name_id,
5769 .depth = depth
5770 };
5771
5772 return node;
5773}
5774
5778static pm_local_variable_read_node_t *
5779pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5780 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5781 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5782}
5783
5788static pm_local_variable_read_node_t *
5789pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5790 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5791 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5792}
5793
5797static pm_local_variable_write_node_t *
5798pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5799 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5800
5801 *node = (pm_local_variable_write_node_t) {
5802 {
5803 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5804 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5805 .node_id = PM_NODE_IDENTIFY(parser),
5806 .location = {
5807 .start = name_loc->start,
5808 .end = value->location.end
5809 }
5810 },
5811 .name = name,
5812 .depth = depth,
5813 .value = value,
5814 .name_loc = *name_loc,
5815 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5816 };
5817
5818 return node;
5819}
5820
5824static inline bool
5825pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5826 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5827}
5828
5833static inline bool
5834pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5835 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5836}
5837
5842static inline void
5843pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5844 if (pm_token_is_numbered_parameter(start, end)) {
5845 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5846 }
5847}
5848
5853static pm_local_variable_target_node_t *
5854pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5855 pm_refute_numbered_parameter(parser, location->start, location->end);
5856 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5857
5858 *node = (pm_local_variable_target_node_t) {
5859 {
5860 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5861 .node_id = PM_NODE_IDENTIFY(parser),
5862 .location = *location
5863 },
5864 .name = name,
5865 .depth = depth
5866 };
5867
5868 return node;
5869}
5870
5874static pm_match_predicate_node_t *
5875pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5876 pm_assert_value_expression(parser, value);
5877
5878 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5879
5880 *node = (pm_match_predicate_node_t) {
5881 {
5882 .type = PM_MATCH_PREDICATE_NODE,
5883 .node_id = PM_NODE_IDENTIFY(parser),
5884 .location = {
5885 .start = value->location.start,
5886 .end = pattern->location.end
5887 }
5888 },
5889 .value = value,
5890 .pattern = pattern,
5891 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5892 };
5893
5894 return node;
5895}
5896
5900static pm_match_required_node_t *
5901pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5902 pm_assert_value_expression(parser, value);
5903
5904 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5905
5906 *node = (pm_match_required_node_t) {
5907 {
5908 .type = PM_MATCH_REQUIRED_NODE,
5909 .node_id = PM_NODE_IDENTIFY(parser),
5910 .location = {
5911 .start = value->location.start,
5912 .end = pattern->location.end
5913 }
5914 },
5915 .value = value,
5916 .pattern = pattern,
5917 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5918 };
5919
5920 return node;
5921}
5922
5926static pm_match_write_node_t *
5927pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5928 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5929
5930 *node = (pm_match_write_node_t) {
5931 {
5932 .type = PM_MATCH_WRITE_NODE,
5933 .node_id = PM_NODE_IDENTIFY(parser),
5934 .location = call->base.location
5935 },
5936 .call = call,
5937 .targets = { 0 }
5938 };
5939
5940 return node;
5941}
5942
5946static pm_module_node_t *
5947pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5948 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5949
5950 *node = (pm_module_node_t) {
5951 {
5952 .type = PM_MODULE_NODE,
5953 .node_id = PM_NODE_IDENTIFY(parser),
5954 .location = {
5955 .start = module_keyword->start,
5956 .end = end_keyword->end
5957 }
5958 },
5959 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5960 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5961 .constant_path = constant_path,
5962 .body = body,
5963 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5964 .name = pm_parser_constant_id_token(parser, name)
5965 };
5966
5967 return node;
5968}
5969
5973static pm_multi_target_node_t *
5974pm_multi_target_node_create(pm_parser_t *parser) {
5975 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5976
5977 *node = (pm_multi_target_node_t) {
5978 {
5979 .type = PM_MULTI_TARGET_NODE,
5980 .node_id = PM_NODE_IDENTIFY(parser),
5981 .location = { .start = NULL, .end = NULL }
5982 },
5983 .lefts = { 0 },
5984 .rest = NULL,
5985 .rights = { 0 },
5986 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5987 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5988 };
5989
5990 return node;
5991}
5992
5996static void
5997pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5998 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5999 if (node->rest == NULL) {
6000 node->rest = target;
6001 } else {
6002 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
6003 pm_node_list_append(&node->rights, target);
6004 }
6005 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6006 if (node->rest == NULL) {
6007 node->rest = target;
6008 } else {
6009 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6010 pm_node_list_append(&node->rights, target);
6011 }
6012 } else if (node->rest == NULL) {
6013 pm_node_list_append(&node->lefts, target);
6014 } else {
6015 pm_node_list_append(&node->rights, target);
6016 }
6017
6018 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6019 node->base.location.start = target->location.start;
6020 }
6021
6022 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6023 node->base.location.end = target->location.end;
6024 }
6025}
6026
6030static void
6031pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6032 node->base.location.start = lparen->start;
6033 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6034}
6035
6039static void
6040pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6041 node->base.location.end = rparen->end;
6042 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6043}
6044
6048static pm_multi_write_node_t *
6049pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6050 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6051
6052 *node = (pm_multi_write_node_t) {
6053 {
6054 .type = PM_MULTI_WRITE_NODE,
6055 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6056 .node_id = PM_NODE_IDENTIFY(parser),
6057 .location = {
6058 .start = target->base.location.start,
6059 .end = value->location.end
6060 }
6061 },
6062 .lefts = target->lefts,
6063 .rest = target->rest,
6064 .rights = target->rights,
6065 .lparen_loc = target->lparen_loc,
6066 .rparen_loc = target->rparen_loc,
6067 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6068 .value = value
6069 };
6070
6071 // Explicitly do not call pm_node_destroy here because we want to keep
6072 // around all of the information within the MultiWriteNode node.
6073 xfree(target);
6074
6075 return node;
6076}
6077
6081static pm_next_node_t *
6082pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6083 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6084 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6085
6086 *node = (pm_next_node_t) {
6087 {
6088 .type = PM_NEXT_NODE,
6089 .node_id = PM_NODE_IDENTIFY(parser),
6090 .location = {
6091 .start = keyword->start,
6092 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6093 }
6094 },
6095 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6096 .arguments = arguments
6097 };
6098
6099 return node;
6100}
6101
6105static pm_nil_node_t *
6106pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6107 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6108 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6109
6110 *node = (pm_nil_node_t) {{
6111 .type = PM_NIL_NODE,
6112 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6113 .node_id = PM_NODE_IDENTIFY(parser),
6114 .location = PM_LOCATION_TOKEN_VALUE(token)
6115 }};
6116
6117 return node;
6118}
6119
6123static pm_no_keywords_parameter_node_t *
6124pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6125 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6126 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6127 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6128
6129 *node = (pm_no_keywords_parameter_node_t) {
6130 {
6131 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6132 .node_id = PM_NODE_IDENTIFY(parser),
6133 .location = {
6134 .start = operator->start,
6135 .end = keyword->end
6136 }
6137 },
6138 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6139 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6140 };
6141
6142 return node;
6143}
6144
6148static pm_numbered_parameters_node_t *
6149pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6150 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6151
6152 *node = (pm_numbered_parameters_node_t) {
6153 {
6154 .type = PM_NUMBERED_PARAMETERS_NODE,
6155 .node_id = PM_NODE_IDENTIFY(parser),
6156 .location = *location
6157 },
6158 .maximum = maximum
6159 };
6160
6161 return node;
6162}
6163
6168#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6169
6176static uint32_t
6177pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6178 const uint8_t *start = token->start + 1;
6179 const uint8_t *end = token->end;
6180
6181 ptrdiff_t diff = end - start;
6182 assert(diff > 0);
6183#if PTRDIFF_MAX > SIZE_MAX
6184 assert(diff < (ptrdiff_t) SIZE_MAX);
6185#endif
6186 size_t length = (size_t) diff;
6187
6188 char *digits = xcalloc(length + 1, sizeof(char));
6189 memcpy(digits, start, length);
6190 digits[length] = '\0';
6191
6192 char *endptr;
6193 errno = 0;
6194 unsigned long value = strtoul(digits, &endptr, 10);
6195
6196 if ((digits == endptr) || (*endptr != '\0')) {
6197 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6198 value = 0;
6199 }
6200
6201 xfree(digits);
6202
6203 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6204 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6205 value = 0;
6206 }
6207
6208 return (uint32_t) value;
6209}
6210
6211#undef NTH_REF_MAX
6212
6216static pm_numbered_reference_read_node_t *
6217pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6218 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6219 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6220
6221 *node = (pm_numbered_reference_read_node_t) {
6222 {
6223 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6224 .node_id = PM_NODE_IDENTIFY(parser),
6225 .location = PM_LOCATION_TOKEN_VALUE(name),
6226 },
6227 .number = pm_numbered_reference_read_node_number(parser, name)
6228 };
6229
6230 return node;
6231}
6232
6236static pm_optional_parameter_node_t *
6237pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6238 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6239
6240 *node = (pm_optional_parameter_node_t) {
6241 {
6242 .type = PM_OPTIONAL_PARAMETER_NODE,
6243 .node_id = PM_NODE_IDENTIFY(parser),
6244 .location = {
6245 .start = name->start,
6246 .end = value->location.end
6247 }
6248 },
6249 .name = pm_parser_constant_id_token(parser, name),
6250 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6251 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6252 .value = value
6253 };
6254
6255 return node;
6256}
6257
6261static pm_or_node_t *
6262pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6263 pm_assert_value_expression(parser, left);
6264
6265 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6266
6267 *node = (pm_or_node_t) {
6268 {
6269 .type = PM_OR_NODE,
6270 .node_id = PM_NODE_IDENTIFY(parser),
6271 .location = {
6272 .start = left->location.start,
6273 .end = right->location.end
6274 }
6275 },
6276 .left = left,
6277 .right = right,
6278 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6279 };
6280
6281 return node;
6282}
6283
6287static pm_parameters_node_t *
6288pm_parameters_node_create(pm_parser_t *parser) {
6289 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6290
6291 *node = (pm_parameters_node_t) {
6292 {
6293 .type = PM_PARAMETERS_NODE,
6294 .node_id = PM_NODE_IDENTIFY(parser),
6295 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6296 },
6297 .rest = NULL,
6298 .keyword_rest = NULL,
6299 .block = NULL,
6300 .requireds = { 0 },
6301 .optionals = { 0 },
6302 .posts = { 0 },
6303 .keywords = { 0 }
6304 };
6305
6306 return node;
6307}
6308
6312static void
6313pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6314 if (params->base.location.start == NULL) {
6315 params->base.location.start = param->location.start;
6316 } else {
6317 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6318 }
6319
6320 if (params->base.location.end == NULL) {
6321 params->base.location.end = param->location.end;
6322 } else {
6323 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6324 }
6325}
6326
6330static void
6331pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6332 pm_parameters_node_location_set(params, param);
6333 pm_node_list_append(&params->requireds, param);
6334}
6335
6339static void
6340pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6341 pm_parameters_node_location_set(params, (pm_node_t *) param);
6342 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6343}
6344
6348static void
6349pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6350 pm_parameters_node_location_set(params, param);
6351 pm_node_list_append(&params->posts, param);
6352}
6353
6357static void
6358pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6359 pm_parameters_node_location_set(params, param);
6360 params->rest = param;
6361}
6362
6366static void
6367pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6368 pm_parameters_node_location_set(params, param);
6369 pm_node_list_append(&params->keywords, param);
6370}
6371
6375static void
6376pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6377 assert(params->keyword_rest == NULL);
6378 pm_parameters_node_location_set(params, param);
6379 params->keyword_rest = param;
6380}
6381
6385static void
6386pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6387 assert(params->block == NULL);
6388 pm_parameters_node_location_set(params, (pm_node_t *) param);
6389 params->block = param;
6390}
6391
6395static pm_program_node_t *
6396pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6397 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6398
6399 *node = (pm_program_node_t) {
6400 {
6401 .type = PM_PROGRAM_NODE,
6402 .node_id = PM_NODE_IDENTIFY(parser),
6403 .location = {
6404 .start = statements == NULL ? parser->start : statements->base.location.start,
6405 .end = statements == NULL ? parser->end : statements->base.location.end
6406 }
6407 },
6408 .locals = *locals,
6409 .statements = statements
6410 };
6411
6412 return node;
6413}
6414
6418static pm_parentheses_node_t *
6419pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6420 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6421
6422 *node = (pm_parentheses_node_t) {
6423 {
6424 .type = PM_PARENTHESES_NODE,
6425 .flags = flags,
6426 .node_id = PM_NODE_IDENTIFY(parser),
6427 .location = {
6428 .start = opening->start,
6429 .end = closing->end
6430 }
6431 },
6432 .body = body,
6433 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6434 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6435 };
6436
6437 return node;
6438}
6439
6443static pm_pinned_expression_node_t *
6444pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6445 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6446
6447 *node = (pm_pinned_expression_node_t) {
6448 {
6449 .type = PM_PINNED_EXPRESSION_NODE,
6450 .node_id = PM_NODE_IDENTIFY(parser),
6451 .location = {
6452 .start = operator->start,
6453 .end = rparen->end
6454 }
6455 },
6456 .expression = expression,
6457 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6458 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6459 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6460 };
6461
6462 return node;
6463}
6464
6468static pm_pinned_variable_node_t *
6469pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6470 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6471
6472 *node = (pm_pinned_variable_node_t) {
6473 {
6474 .type = PM_PINNED_VARIABLE_NODE,
6475 .node_id = PM_NODE_IDENTIFY(parser),
6476 .location = {
6477 .start = operator->start,
6478 .end = variable->location.end
6479 }
6480 },
6481 .variable = variable,
6482 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6483 };
6484
6485 return node;
6486}
6487
6491static pm_post_execution_node_t *
6492pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6493 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6494
6495 *node = (pm_post_execution_node_t) {
6496 {
6497 .type = PM_POST_EXECUTION_NODE,
6498 .node_id = PM_NODE_IDENTIFY(parser),
6499 .location = {
6500 .start = keyword->start,
6501 .end = closing->end
6502 }
6503 },
6504 .statements = statements,
6505 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6506 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6507 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6508 };
6509
6510 return node;
6511}
6512
6516static pm_pre_execution_node_t *
6517pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6518 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6519
6520 *node = (pm_pre_execution_node_t) {
6521 {
6522 .type = PM_PRE_EXECUTION_NODE,
6523 .node_id = PM_NODE_IDENTIFY(parser),
6524 .location = {
6525 .start = keyword->start,
6526 .end = closing->end
6527 }
6528 },
6529 .statements = statements,
6530 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6531 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6532 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6533 };
6534
6535 return node;
6536}
6537
6541static pm_range_node_t *
6542pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6543 pm_assert_value_expression(parser, left);
6544 pm_assert_value_expression(parser, right);
6545
6546 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6547 pm_node_flags_t flags = 0;
6548
6549 // Indicate that this node is an exclusive range if the operator is `...`.
6550 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6551 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6552 }
6553
6554 // Indicate that this node is a static literal (i.e., can be compiled with
6555 // a putobject in CRuby) if the left and right are implicit nil, explicit
6556 // nil, or integers.
6557 if (
6558 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6559 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6560 ) {
6561 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6562 }
6563
6564 *node = (pm_range_node_t) {
6565 {
6566 .type = PM_RANGE_NODE,
6567 .flags = flags,
6568 .node_id = PM_NODE_IDENTIFY(parser),
6569 .location = {
6570 .start = (left == NULL ? operator->start : left->location.start),
6571 .end = (right == NULL ? operator->end : right->location.end)
6572 }
6573 },
6574 .left = left,
6575 .right = right,
6576 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6577 };
6578
6579 return node;
6580}
6581
6585static pm_redo_node_t *
6586pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6587 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6588 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6589
6590 *node = (pm_redo_node_t) {{
6591 .type = PM_REDO_NODE,
6592 .node_id = PM_NODE_IDENTIFY(parser),
6593 .location = PM_LOCATION_TOKEN_VALUE(token)
6594 }};
6595
6596 return node;
6597}
6598
6603static pm_regular_expression_node_t *
6604pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6605 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6606
6607 *node = (pm_regular_expression_node_t) {
6608 {
6609 .type = PM_REGULAR_EXPRESSION_NODE,
6610 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6611 .node_id = PM_NODE_IDENTIFY(parser),
6612 .location = {
6613 .start = MIN(opening->start, closing->start),
6614 .end = MAX(opening->end, closing->end)
6615 }
6616 },
6617 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6618 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6619 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6620 .unescaped = *unescaped
6621 };
6622
6623 return node;
6624}
6625
6629static inline pm_regular_expression_node_t *
6630pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6631 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6632}
6633
6637static pm_required_parameter_node_t *
6638pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6639 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6640
6641 *node = (pm_required_parameter_node_t) {
6642 {
6643 .type = PM_REQUIRED_PARAMETER_NODE,
6644 .node_id = PM_NODE_IDENTIFY(parser),
6645 .location = PM_LOCATION_TOKEN_VALUE(token)
6646 },
6647 .name = pm_parser_constant_id_token(parser, token)
6648 };
6649
6650 return node;
6651}
6652
6656static pm_rescue_modifier_node_t *
6657pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6658 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6659
6660 *node = (pm_rescue_modifier_node_t) {
6661 {
6662 .type = PM_RESCUE_MODIFIER_NODE,
6663 .node_id = PM_NODE_IDENTIFY(parser),
6664 .location = {
6665 .start = expression->location.start,
6666 .end = rescue_expression->location.end
6667 }
6668 },
6669 .expression = expression,
6670 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6671 .rescue_expression = rescue_expression
6672 };
6673
6674 return node;
6675}
6676
6680static pm_rescue_node_t *
6681pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6682 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6683
6684 *node = (pm_rescue_node_t) {
6685 {
6686 .type = PM_RESCUE_NODE,
6687 .node_id = PM_NODE_IDENTIFY(parser),
6688 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6689 },
6690 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6691 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6692 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6693 .reference = NULL,
6694 .statements = NULL,
6695 .subsequent = NULL,
6696 .exceptions = { 0 }
6697 };
6698
6699 return node;
6700}
6701
6702static inline void
6703pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6704 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6705}
6706
6710static void
6711pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6712 node->reference = reference;
6713 node->base.location.end = reference->location.end;
6714}
6715
6719static void
6720pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6721 node->statements = statements;
6722 if (pm_statements_node_body_length(statements) > 0) {
6723 node->base.location.end = statements->base.location.end;
6724 }
6725}
6726
6730static void
6731pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6732 node->subsequent = subsequent;
6733 node->base.location.end = subsequent->base.location.end;
6734}
6735
6739static void
6740pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6741 pm_node_list_append(&node->exceptions, exception);
6742 node->base.location.end = exception->location.end;
6743}
6744
6748static pm_rest_parameter_node_t *
6749pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6750 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6751
6752 *node = (pm_rest_parameter_node_t) {
6753 {
6754 .type = PM_REST_PARAMETER_NODE,
6755 .node_id = PM_NODE_IDENTIFY(parser),
6756 .location = {
6757 .start = operator->start,
6758 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6759 }
6760 },
6761 .name = pm_parser_optional_constant_id_token(parser, name),
6762 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6763 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6764 };
6765
6766 return node;
6767}
6768
6772static pm_retry_node_t *
6773pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6774 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6775 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6776
6777 *node = (pm_retry_node_t) {{
6778 .type = PM_RETRY_NODE,
6779 .node_id = PM_NODE_IDENTIFY(parser),
6780 .location = PM_LOCATION_TOKEN_VALUE(token)
6781 }};
6782
6783 return node;
6784}
6785
6789static pm_return_node_t *
6790pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6791 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6792
6793 *node = (pm_return_node_t) {
6794 {
6795 .type = PM_RETURN_NODE,
6796 .node_id = PM_NODE_IDENTIFY(parser),
6797 .location = {
6798 .start = keyword->start,
6799 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6800 }
6801 },
6802 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6803 .arguments = arguments
6804 };
6805
6806 return node;
6807}
6808
6812static pm_self_node_t *
6813pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6814 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6815 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6816
6817 *node = (pm_self_node_t) {{
6818 .type = PM_SELF_NODE,
6819 .node_id = PM_NODE_IDENTIFY(parser),
6820 .location = PM_LOCATION_TOKEN_VALUE(token)
6821 }};
6822
6823 return node;
6824}
6825
6829static pm_shareable_constant_node_t *
6830pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6831 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6832
6833 *node = (pm_shareable_constant_node_t) {
6834 {
6835 .type = PM_SHAREABLE_CONSTANT_NODE,
6836 .flags = (pm_node_flags_t) value,
6837 .node_id = PM_NODE_IDENTIFY(parser),
6838 .location = PM_LOCATION_NODE_VALUE(write)
6839 },
6840 .write = write
6841 };
6842
6843 return node;
6844}
6845
6849static pm_singleton_class_node_t *
6850pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6851 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6852
6853 *node = (pm_singleton_class_node_t) {
6854 {
6855 .type = PM_SINGLETON_CLASS_NODE,
6856 .node_id = PM_NODE_IDENTIFY(parser),
6857 .location = {
6858 .start = class_keyword->start,
6859 .end = end_keyword->end
6860 }
6861 },
6862 .locals = *locals,
6863 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6864 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6865 .expression = expression,
6866 .body = body,
6867 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6868 };
6869
6870 return node;
6871}
6872
6876static pm_source_encoding_node_t *
6877pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6878 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6879 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6880
6881 *node = (pm_source_encoding_node_t) {{
6882 .type = PM_SOURCE_ENCODING_NODE,
6883 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6884 .node_id = PM_NODE_IDENTIFY(parser),
6885 .location = PM_LOCATION_TOKEN_VALUE(token)
6886 }};
6887
6888 return node;
6889}
6890
6894static pm_source_file_node_t*
6895pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6896 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6897 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6898
6899 pm_node_flags_t flags = 0;
6900
6901 switch (parser->frozen_string_literal) {
6902 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6903 flags |= PM_STRING_FLAGS_MUTABLE;
6904 break;
6905 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6906 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6907 break;
6908 }
6909
6910 *node = (pm_source_file_node_t) {
6911 {
6912 .type = PM_SOURCE_FILE_NODE,
6913 .flags = flags,
6914 .node_id = PM_NODE_IDENTIFY(parser),
6915 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6916 },
6917 .filepath = parser->filepath
6918 };
6919
6920 return node;
6921}
6922
6926static pm_source_line_node_t *
6927pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6928 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6929 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6930
6931 *node = (pm_source_line_node_t) {{
6932 .type = PM_SOURCE_LINE_NODE,
6933 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6934 .node_id = PM_NODE_IDENTIFY(parser),
6935 .location = PM_LOCATION_TOKEN_VALUE(token)
6936 }};
6937
6938 return node;
6939}
6940
6944static pm_splat_node_t *
6945pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6946 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6947
6948 *node = (pm_splat_node_t) {
6949 {
6950 .type = PM_SPLAT_NODE,
6951 .node_id = PM_NODE_IDENTIFY(parser),
6952 .location = {
6953 .start = operator->start,
6954 .end = (expression == NULL ? operator->end : expression->location.end)
6955 }
6956 },
6957 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6958 .expression = expression
6959 };
6960
6961 return node;
6962}
6963
6967static pm_statements_node_t *
6968pm_statements_node_create(pm_parser_t *parser) {
6969 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6970
6971 *node = (pm_statements_node_t) {
6972 {
6973 .type = PM_STATEMENTS_NODE,
6974 .node_id = PM_NODE_IDENTIFY(parser),
6975 .location = PM_LOCATION_NULL_VALUE(parser)
6976 },
6977 .body = { 0 }
6978 };
6979
6980 return node;
6981}
6982
6986static size_t
6987pm_statements_node_body_length(pm_statements_node_t *node) {
6988 return node && node->body.size;
6989}
6990
6994static void
6995pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6996 node->base.location = (pm_location_t) { .start = start, .end = end };
6997}
6998
7003static inline void
7004pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7005 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7006 node->base.location.start = statement->location.start;
7007 }
7008
7009 if (statement->location.end > node->base.location.end) {
7010 node->base.location.end = statement->location.end;
7011 }
7012}
7013
7017static void
7018pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7019 pm_statements_node_body_update(node, statement);
7020
7021 if (node->body.size > 0) {
7022 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7023
7024 switch (PM_NODE_TYPE(previous)) {
7025 case PM_BREAK_NODE:
7026 case PM_NEXT_NODE:
7027 case PM_REDO_NODE:
7028 case PM_RETRY_NODE:
7029 case PM_RETURN_NODE:
7030 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7031 break;
7032 default:
7033 break;
7034 }
7035 }
7036
7037 pm_node_list_append(&node->body, statement);
7038 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7039}
7040
7044static void
7045pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7046 pm_statements_node_body_update(node, statement);
7047 pm_node_list_prepend(&node->body, statement);
7048 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7049}
7050
7054static inline pm_string_node_t *
7055pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7056 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7057 pm_node_flags_t flags = 0;
7058
7059 switch (parser->frozen_string_literal) {
7060 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7061 flags = PM_STRING_FLAGS_MUTABLE;
7062 break;
7063 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7064 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7065 break;
7066 }
7067
7068 *node = (pm_string_node_t) {
7069 {
7070 .type = PM_STRING_NODE,
7071 .flags = flags,
7072 .node_id = PM_NODE_IDENTIFY(parser),
7073 .location = {
7074 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7075 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7076 }
7077 },
7078 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7079 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7080 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7081 .unescaped = *string
7082 };
7083
7084 return node;
7085}
7086
7090static pm_string_node_t *
7091pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7092 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7093}
7094
7099static pm_string_node_t *
7100pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7101 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7102 parser->current_string = PM_STRING_EMPTY;
7103 return node;
7104}
7105
7109static pm_super_node_t *
7110pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7111 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7112 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7113
7114 const uint8_t *end = pm_arguments_end(arguments);
7115 if (end == NULL) {
7116 assert(false && "unreachable");
7117 }
7118
7119 *node = (pm_super_node_t) {
7120 {
7121 .type = PM_SUPER_NODE,
7122 .node_id = PM_NODE_IDENTIFY(parser),
7123 .location = {
7124 .start = keyword->start,
7125 .end = end,
7126 }
7127 },
7128 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7129 .lparen_loc = arguments->opening_loc,
7130 .arguments = arguments->arguments,
7131 .rparen_loc = arguments->closing_loc,
7132 .block = arguments->block
7133 };
7134
7135 return node;
7136}
7137
7142static bool
7143pm_ascii_only_p(const pm_string_t *contents) {
7144 const size_t length = pm_string_length(contents);
7145 const uint8_t *source = pm_string_source(contents);
7146
7147 for (size_t index = 0; index < length; index++) {
7148 if (source[index] & 0x80) return false;
7149 }
7150
7151 return true;
7152}
7153
7157static void
7158parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7159 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7160 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7161
7162 if (width == 0) {
7163 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7164 break;
7165 }
7166
7167 cursor += width;
7168 }
7169}
7170
7175static void
7176parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7177 const pm_encoding_t *encoding = parser->encoding;
7178
7179 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7180 size_t width = encoding->char_width(cursor, end - cursor);
7181
7182 if (width == 0) {
7183 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7184 break;
7185 }
7186
7187 cursor += width;
7188 }
7189}
7190
7200static inline pm_node_flags_t
7201parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7202 if (parser->explicit_encoding != NULL) {
7203 // A Symbol may optionally have its encoding explicitly set. This will
7204 // happen if an escape sequence results in a non-ASCII code point.
7205 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7206 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7207 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7208 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7209 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7210 } else if (validate) {
7211 parse_symbol_encoding_validate_other(parser, location, contents);
7212 }
7213 } else if (pm_ascii_only_p(contents)) {
7214 // Ruby stipulates that all source files must use an ASCII-compatible
7215 // encoding. Thus, all symbols appearing in source are eligible for
7216 // "downgrading" to US-ASCII.
7217 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7218 } else if (validate) {
7219 parse_symbol_encoding_validate_other(parser, location, contents);
7220 }
7221
7222 return 0;
7223}
7224
7225static pm_node_flags_t
7226parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7227 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7228 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7229 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7230 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7231
7232 // There's special validation logic used if a string does not contain any character escape sequences.
7233 if (parser->explicit_encoding == NULL) {
7234 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7235 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7236 // the US-ASCII encoding.
7237 if (ascii_only) {
7238 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7239 }
7240
7241 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7242 if (!ascii_only) {
7243 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7244 }
7245 } else if (parser->encoding != modifier_encoding) {
7246 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7247
7248 if (modifier == 'n' && !ascii_only) {
7249 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7250 }
7251 }
7252
7253 return flags;
7254 }
7255
7256 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7257 bool mixed_encoding = false;
7258
7259 if (mixed_encoding) {
7260 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7261 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7262 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7263 bool valid_string_in_modifier_encoding = true;
7264
7265 if (!valid_string_in_modifier_encoding) {
7266 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7267 }
7268 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7269 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7270 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7271 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7272 }
7273 }
7274
7275 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7276 return flags;
7277}
7278
7285static pm_node_flags_t
7286parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7287 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7288 bool valid_unicode_range = true;
7289 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7290 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7291 return flags;
7292 }
7293
7294 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7295 // to multi-byte characters are allowed.
7296 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7297 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7298 // following error message appearing twice. We do the same for compatibility.
7299 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7300 }
7301
7310 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7311 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7312 }
7313
7314 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7315 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7316 }
7317
7318 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7319 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7320 }
7321
7322 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7323 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7324 }
7325
7326 // At this point no encoding modifiers will be present on the regular expression as they would have already
7327 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7328 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7329 if (ascii_only) {
7330 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7331 }
7332
7333 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7334 // or by specifying a modifier.
7335 //
7336 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7337 if (parser->explicit_encoding != NULL) {
7338 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7339 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7340 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7341 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7342 }
7343 }
7344
7345 return 0;
7346}
7347
7352static pm_symbol_node_t *
7353pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7354 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7355
7356 *node = (pm_symbol_node_t) {
7357 {
7358 .type = PM_SYMBOL_NODE,
7359 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7360 .node_id = PM_NODE_IDENTIFY(parser),
7361 .location = {
7362 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7363 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7364 }
7365 },
7366 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7367 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7368 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7369 .unescaped = *unescaped
7370 };
7371
7372 return node;
7373}
7374
7378static inline pm_symbol_node_t *
7379pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7380 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7381}
7382
7386static pm_symbol_node_t *
7387pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7388 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7389 parser->current_string = PM_STRING_EMPTY;
7390 return node;
7391}
7392
7396static pm_symbol_node_t *
7397pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7398 pm_symbol_node_t *node;
7399
7400 switch (token->type) {
7401 case PM_TOKEN_LABEL: {
7402 pm_token_t opening = not_provided(parser);
7403 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7404
7405 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7406 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7407
7408 assert((label.end - label.start) >= 0);
7409 pm_string_shared_init(&node->unescaped, label.start, label.end);
7410 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7411
7412 break;
7413 }
7414 case PM_TOKEN_MISSING: {
7415 pm_token_t opening = not_provided(parser);
7416 pm_token_t closing = not_provided(parser);
7417
7418 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7419 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7420 break;
7421 }
7422 default:
7423 assert(false && "unreachable");
7424 node = NULL;
7425 break;
7426 }
7427
7428 return node;
7429}
7430
7434static pm_symbol_node_t *
7435pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7436 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7437
7438 *node = (pm_symbol_node_t) {
7439 {
7440 .type = PM_SYMBOL_NODE,
7441 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7442 .node_id = PM_NODE_IDENTIFY(parser),
7443 .location = PM_LOCATION_NULL_VALUE(parser)
7444 },
7445 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7446 .unescaped = { 0 }
7447 };
7448
7449 pm_string_constant_init(&node->unescaped, content, strlen(content));
7450 return node;
7451}
7452
7456static bool
7457pm_symbol_node_label_p(pm_node_t *node) {
7458 const uint8_t *end = NULL;
7459
7460 switch (PM_NODE_TYPE(node)) {
7461 case PM_SYMBOL_NODE:
7462 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7463 break;
7464 case PM_INTERPOLATED_SYMBOL_NODE:
7465 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7466 break;
7467 default:
7468 return false;
7469 }
7470
7471 return (end != NULL) && (end[-1] == ':');
7472}
7473
7477static pm_symbol_node_t *
7478pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7479 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7480
7481 *new_node = (pm_symbol_node_t) {
7482 {
7483 .type = PM_SYMBOL_NODE,
7484 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7485 .node_id = PM_NODE_IDENTIFY(parser),
7486 .location = {
7487 .start = opening->start,
7488 .end = closing->end
7489 }
7490 },
7491 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7492 .value_loc = node->content_loc,
7493 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7494 .unescaped = node->unescaped
7495 };
7496
7497 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7498 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7499
7500 // We are explicitly _not_ using pm_node_destroy here because we don't want
7501 // to trash the unescaped string. We could instead copy the string if we
7502 // know that it is owned, but we're taking the fast path for now.
7503 xfree(node);
7504
7505 return new_node;
7506}
7507
7511static pm_string_node_t *
7512pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7513 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7514 pm_node_flags_t flags = 0;
7515
7516 switch (parser->frozen_string_literal) {
7517 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7518 flags = PM_STRING_FLAGS_MUTABLE;
7519 break;
7520 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7521 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7522 break;
7523 }
7524
7525 *new_node = (pm_string_node_t) {
7526 {
7527 .type = PM_STRING_NODE,
7528 .flags = flags,
7529 .node_id = PM_NODE_IDENTIFY(parser),
7530 .location = node->base.location
7531 },
7532 .opening_loc = node->opening_loc,
7533 .content_loc = node->value_loc,
7534 .closing_loc = node->closing_loc,
7535 .unescaped = node->unescaped
7536 };
7537
7538 // We are explicitly _not_ using pm_node_destroy here because we don't want
7539 // to trash the unescaped string. We could instead copy the string if we
7540 // know that it is owned, but we're taking the fast path for now.
7541 xfree(node);
7542
7543 return new_node;
7544}
7545
7549static pm_true_node_t *
7550pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7551 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7552 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7553
7554 *node = (pm_true_node_t) {{
7555 .type = PM_TRUE_NODE,
7556 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7557 .node_id = PM_NODE_IDENTIFY(parser),
7558 .location = PM_LOCATION_TOKEN_VALUE(token)
7559 }};
7560
7561 return node;
7562}
7563
7567static pm_true_node_t *
7568pm_true_node_synthesized_create(pm_parser_t *parser) {
7569 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7570
7571 *node = (pm_true_node_t) {{
7572 .type = PM_TRUE_NODE,
7573 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7574 .node_id = PM_NODE_IDENTIFY(parser),
7575 .location = { .start = parser->start, .end = parser->end }
7576 }};
7577
7578 return node;
7579}
7580
7584static pm_undef_node_t *
7585pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7586 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7587 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7588
7589 *node = (pm_undef_node_t) {
7590 {
7591 .type = PM_UNDEF_NODE,
7592 .node_id = PM_NODE_IDENTIFY(parser),
7593 .location = PM_LOCATION_TOKEN_VALUE(token),
7594 },
7595 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7596 .names = { 0 }
7597 };
7598
7599 return node;
7600}
7601
7605static void
7606pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7607 node->base.location.end = name->location.end;
7608 pm_node_list_append(&node->names, name);
7609}
7610
7614static pm_unless_node_t *
7615pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7616 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7617 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7618
7619 const uint8_t *end;
7620 if (statements != NULL) {
7621 end = statements->base.location.end;
7622 } else {
7623 end = predicate->location.end;
7624 }
7625
7626 *node = (pm_unless_node_t) {
7627 {
7628 .type = PM_UNLESS_NODE,
7629 .flags = PM_NODE_FLAG_NEWLINE,
7630 .node_id = PM_NODE_IDENTIFY(parser),
7631 .location = {
7632 .start = keyword->start,
7633 .end = end
7634 },
7635 },
7636 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7637 .predicate = predicate,
7638 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7639 .statements = statements,
7640 .else_clause = NULL,
7641 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7642 };
7643
7644 return node;
7645}
7646
7650static pm_unless_node_t *
7651pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7652 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7653 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7654
7655 pm_statements_node_t *statements = pm_statements_node_create(parser);
7656 pm_statements_node_body_append(parser, statements, statement, true);
7657
7658 *node = (pm_unless_node_t) {
7659 {
7660 .type = PM_UNLESS_NODE,
7661 .flags = PM_NODE_FLAG_NEWLINE,
7662 .node_id = PM_NODE_IDENTIFY(parser),
7663 .location = {
7664 .start = statement->location.start,
7665 .end = predicate->location.end
7666 },
7667 },
7668 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7669 .predicate = predicate,
7670 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7671 .statements = statements,
7672 .else_clause = NULL,
7673 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7674 };
7675
7676 return node;
7677}
7678
7679static inline void
7680pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7681 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7682 node->base.location.end = end_keyword->end;
7683}
7684
7690static void
7691pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7692 assert(parser->current_block_exits != NULL);
7693
7694 // All of the block exits that we want to remove should be within the
7695 // statements, and since we are modifying the statements, we shouldn't have
7696 // to check the end location.
7697 const uint8_t *start = statements->base.location.start;
7698
7699 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7700 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7701 if (block_exit->location.start < start) break;
7702
7703 // Implicitly remove from the list by lowering the size.
7704 parser->current_block_exits->size--;
7705 }
7706}
7707
7711static pm_until_node_t *
7712pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7713 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7714 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7715
7716 *node = (pm_until_node_t) {
7717 {
7718 .type = PM_UNTIL_NODE,
7719 .flags = flags,
7720 .node_id = PM_NODE_IDENTIFY(parser),
7721 .location = {
7722 .start = keyword->start,
7723 .end = closing->end,
7724 },
7725 },
7726 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7727 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7728 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7729 .predicate = predicate,
7730 .statements = statements
7731 };
7732
7733 return node;
7734}
7735
7739static pm_until_node_t *
7740pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7741 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7742 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7743 pm_loop_modifier_block_exits(parser, statements);
7744
7745 *node = (pm_until_node_t) {
7746 {
7747 .type = PM_UNTIL_NODE,
7748 .flags = flags,
7749 .node_id = PM_NODE_IDENTIFY(parser),
7750 .location = {
7751 .start = statements->base.location.start,
7752 .end = predicate->location.end,
7753 },
7754 },
7755 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7756 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7757 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7758 .predicate = predicate,
7759 .statements = statements
7760 };
7761
7762 return node;
7763}
7764
7768static pm_when_node_t *
7769pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7770 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7771
7772 *node = (pm_when_node_t) {
7773 {
7774 .type = PM_WHEN_NODE,
7775 .node_id = PM_NODE_IDENTIFY(parser),
7776 .location = {
7777 .start = keyword->start,
7778 .end = NULL
7779 }
7780 },
7781 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7782 .statements = NULL,
7783 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7784 .conditions = { 0 }
7785 };
7786
7787 return node;
7788}
7789
7793static void
7794pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7795 node->base.location.end = condition->location.end;
7796 pm_node_list_append(&node->conditions, condition);
7797}
7798
7802static inline void
7803pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7804 node->base.location.end = then_keyword->end;
7805 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7806}
7807
7811static void
7812pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7813 if (statements->base.location.end > node->base.location.end) {
7814 node->base.location.end = statements->base.location.end;
7815 }
7816
7817 node->statements = statements;
7818}
7819
7823static pm_while_node_t *
7824pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7825 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7826 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7827
7828 *node = (pm_while_node_t) {
7829 {
7830 .type = PM_WHILE_NODE,
7831 .flags = flags,
7832 .node_id = PM_NODE_IDENTIFY(parser),
7833 .location = {
7834 .start = keyword->start,
7835 .end = closing->end
7836 },
7837 },
7838 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7839 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7840 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7841 .predicate = predicate,
7842 .statements = statements
7843 };
7844
7845 return node;
7846}
7847
7851static pm_while_node_t *
7852pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7853 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7854 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7855 pm_loop_modifier_block_exits(parser, statements);
7856
7857 *node = (pm_while_node_t) {
7858 {
7859 .type = PM_WHILE_NODE,
7860 .flags = flags,
7861 .node_id = PM_NODE_IDENTIFY(parser),
7862 .location = {
7863 .start = statements->base.location.start,
7864 .end = predicate->location.end
7865 },
7866 },
7867 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7868 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7869 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7870 .predicate = predicate,
7871 .statements = statements
7872 };
7873
7874 return node;
7875}
7876
7880static pm_while_node_t *
7881pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7882 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7883
7884 *node = (pm_while_node_t) {
7885 {
7886 .type = PM_WHILE_NODE,
7887 .node_id = PM_NODE_IDENTIFY(parser),
7888 .location = PM_LOCATION_NULL_VALUE(parser)
7889 },
7890 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7891 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7892 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7893 .predicate = predicate,
7894 .statements = statements
7895 };
7896
7897 return node;
7898}
7899
7904static pm_x_string_node_t *
7905pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7906 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7907
7908 *node = (pm_x_string_node_t) {
7909 {
7910 .type = PM_X_STRING_NODE,
7911 .flags = PM_STRING_FLAGS_FROZEN,
7912 .node_id = PM_NODE_IDENTIFY(parser),
7913 .location = {
7914 .start = opening->start,
7915 .end = closing->end
7916 },
7917 },
7918 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7919 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7920 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7921 .unescaped = *unescaped
7922 };
7923
7924 return node;
7925}
7926
7930static inline pm_x_string_node_t *
7931pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7932 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7933}
7934
7938static pm_yield_node_t *
7939pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7940 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7941
7942 const uint8_t *end;
7943 if (rparen_loc->start != NULL) {
7944 end = rparen_loc->end;
7945 } else if (arguments != NULL) {
7946 end = arguments->base.location.end;
7947 } else if (lparen_loc->start != NULL) {
7948 end = lparen_loc->end;
7949 } else {
7950 end = keyword->end;
7951 }
7952
7953 *node = (pm_yield_node_t) {
7954 {
7955 .type = PM_YIELD_NODE,
7956 .node_id = PM_NODE_IDENTIFY(parser),
7957 .location = {
7958 .start = keyword->start,
7959 .end = end
7960 },
7961 },
7962 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7963 .lparen_loc = *lparen_loc,
7964 .arguments = arguments,
7965 .rparen_loc = *rparen_loc
7966 };
7967
7968 return node;
7969}
7970
7971#undef PM_NODE_ALLOC
7972#undef PM_NODE_IDENTIFY
7973
7978static int
7979pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7980 pm_scope_t *scope = parser->current_scope;
7981 int depth = 0;
7982
7983 while (scope != NULL) {
7984 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7985 if (scope->closed) break;
7986
7987 scope = scope->previous;
7988 depth++;
7989 }
7990
7991 return -1;
7992}
7993
7999static inline int
8000pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
8001 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
8002}
8003
8007static inline void
8008pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8009 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8010}
8011
8015static pm_constant_id_t
8016pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8017 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8018 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8019 return constant_id;
8020}
8021
8025static inline pm_constant_id_t
8026pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8027 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8028}
8029
8033static pm_constant_id_t
8034pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8035 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8036 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8037 return constant_id;
8038}
8039
8043static pm_constant_id_t
8044pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8045 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8046 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8047 return constant_id;
8048}
8049
8057static bool
8058pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8059 // We want to check whether the parameter name is a numbered parameter or
8060 // not.
8061 pm_refute_numbered_parameter(parser, name->start, name->end);
8062
8063 // Otherwise we'll fetch the constant id for the parameter name and check
8064 // whether it's already in the current scope.
8065 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8066
8067 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8068 // Add an error if the parameter doesn't start with _ and has been seen before
8069 if ((name->start < name->end) && (*name->start != '_')) {
8070 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8071 }
8072 return true;
8073 }
8074 return false;
8075}
8076
8080static void
8081pm_parser_scope_pop(pm_parser_t *parser) {
8082 pm_scope_t *scope = parser->current_scope;
8083 parser->current_scope = scope->previous;
8084 pm_locals_free(&scope->locals);
8085 pm_node_list_free(&scope->implicit_parameters);
8086 xfree(scope);
8087}
8088
8089/******************************************************************************/
8090/* Stack helpers */
8091/******************************************************************************/
8092
8096static inline void
8097pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8098 *stack = (*stack << 1) | (value & 1);
8099}
8100
8104static inline void
8105pm_state_stack_pop(pm_state_stack_t *stack) {
8106 *stack >>= 1;
8107}
8108
8112static inline bool
8113pm_state_stack_p(const pm_state_stack_t *stack) {
8114 return *stack & 1;
8115}
8116
8117static inline void
8118pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8119 // Use the negation of the value to prevent stack overflow.
8120 pm_state_stack_push(&parser->accepts_block_stack, !value);
8121}
8122
8123static inline void
8124pm_accepts_block_stack_pop(pm_parser_t *parser) {
8125 pm_state_stack_pop(&parser->accepts_block_stack);
8126}
8127
8128static inline bool
8129pm_accepts_block_stack_p(pm_parser_t *parser) {
8130 return !pm_state_stack_p(&parser->accepts_block_stack);
8131}
8132
8133static inline void
8134pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8135 pm_state_stack_push(&parser->do_loop_stack, value);
8136}
8137
8138static inline void
8139pm_do_loop_stack_pop(pm_parser_t *parser) {
8140 pm_state_stack_pop(&parser->do_loop_stack);
8141}
8142
8143static inline bool
8144pm_do_loop_stack_p(pm_parser_t *parser) {
8145 return pm_state_stack_p(&parser->do_loop_stack);
8146}
8147
8148/******************************************************************************/
8149/* Lexer check helpers */
8150/******************************************************************************/
8151
8156static inline uint8_t
8157peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8158 if (cursor < parser->end) {
8159 return *cursor;
8160 } else {
8161 return '\0';
8162 }
8163}
8164
8170static inline uint8_t
8171peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8172 return peek_at(parser, parser->current.end + offset);
8173}
8174
8179static inline uint8_t
8180peek(const pm_parser_t *parser) {
8181 return peek_at(parser, parser->current.end);
8182}
8183
8188static inline bool
8189match(pm_parser_t *parser, uint8_t value) {
8190 if (peek(parser) == value) {
8191 parser->current.end++;
8192 return true;
8193 }
8194 return false;
8195}
8196
8201static inline size_t
8202match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8203 if (peek_at(parser, cursor) == '\n') {
8204 return 1;
8205 }
8206 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8207 return 2;
8208 }
8209 return 0;
8210}
8211
8217static inline size_t
8218match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8219 return match_eol_at(parser, parser->current.end + offset);
8220}
8221
8227static inline size_t
8228match_eol(pm_parser_t *parser) {
8229 return match_eol_at(parser, parser->current.end);
8230}
8231
8235static inline const uint8_t *
8236next_newline(const uint8_t *cursor, ptrdiff_t length) {
8237 assert(length >= 0);
8238
8239 // Note that it's okay for us to use memchr here to look for \n because none
8240 // of the encodings that we support have \n as a component of a multi-byte
8241 // character.
8242 return memchr(cursor, '\n', (size_t) length);
8243}
8244
8248static inline bool
8249ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8250 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8251}
8252
8257static bool
8258parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8259 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8260
8261 if (encoding != NULL) {
8262 if (parser->encoding != encoding) {
8263 parser->encoding = encoding;
8264 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8265 }
8266
8267 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8268 return true;
8269 }
8270
8271 return false;
8272}
8273
8278static void
8279parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8280 const uint8_t *cursor = parser->current.start + 1;
8281 const uint8_t *end = parser->current.end;
8282
8283 bool separator = false;
8284 while (true) {
8285 if (end - cursor <= 6) return;
8286 switch (cursor[6]) {
8287 case 'C': case 'c': cursor += 6; continue;
8288 case 'O': case 'o': cursor += 5; continue;
8289 case 'D': case 'd': cursor += 4; continue;
8290 case 'I': case 'i': cursor += 3; continue;
8291 case 'N': case 'n': cursor += 2; continue;
8292 case 'G': case 'g': cursor += 1; continue;
8293 case '=': case ':':
8294 separator = true;
8295 cursor += 6;
8296 break;
8297 default:
8298 cursor += 6;
8299 if (pm_char_is_whitespace(*cursor)) break;
8300 continue;
8301 }
8302 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8303 separator = false;
8304 }
8305
8306 while (true) {
8307 do {
8308 if (++cursor >= end) return;
8309 } while (pm_char_is_whitespace(*cursor));
8310
8311 if (separator) break;
8312 if (*cursor != '=' && *cursor != ':') return;
8313
8314 separator = true;
8315 cursor++;
8316 }
8317
8318 const uint8_t *value_start = cursor;
8319 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8320
8321 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8322 // If we were unable to parse the encoding value, then we've got an
8323 // issue because we didn't understand the encoding that the user was
8324 // trying to use. In this case we'll keep using the default encoding but
8325 // add an error to the parser to indicate an unsuccessful parse.
8326 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8327 }
8328}
8329
8330typedef enum {
8331 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8332 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8333 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8334} pm_magic_comment_boolean_value_t;
8335
8340static pm_magic_comment_boolean_value_t
8341parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8342 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8343 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8344 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8345 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8346 } else {
8347 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8348 }
8349}
8350
8351static inline bool
8352pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8353 return b == '\'' || b == '"' || b == ':' || b == ';';
8354}
8355
8361static inline const uint8_t *
8362parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8363 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8364 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8365 return cursor;
8366 }
8367 cursor++;
8368 }
8369 return NULL;
8370}
8371
8382static inline bool
8383parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8384 bool result = true;
8385
8386 const uint8_t *start = parser->current.start + 1;
8387 const uint8_t *end = parser->current.end;
8388 if (end - start <= 7) return false;
8389
8390 const uint8_t *cursor;
8391 bool indicator = false;
8392
8393 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8394 start = cursor + 3;
8395
8396 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8397 end = cursor;
8398 indicator = true;
8399 } else {
8400 // If we have a start marker but not an end marker, then we cannot
8401 // have a magic comment.
8402 return false;
8403 }
8404 }
8405
8406 cursor = start;
8407 while (cursor < end) {
8408 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8409
8410 const uint8_t *key_start = cursor;
8411 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8412
8413 const uint8_t *key_end = cursor;
8414 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8415 if (cursor == end) break;
8416
8417 if (*cursor == ':') {
8418 cursor++;
8419 } else {
8420 if (!indicator) return false;
8421 continue;
8422 }
8423
8424 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8425 if (cursor == end) break;
8426
8427 const uint8_t *value_start;
8428 const uint8_t *value_end;
8429
8430 if (*cursor == '"') {
8431 value_start = ++cursor;
8432 for (; cursor < end && *cursor != '"'; cursor++) {
8433 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8434 }
8435 value_end = cursor;
8436 if (*cursor == '"') cursor++;
8437 } else {
8438 value_start = cursor;
8439 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8440 value_end = cursor;
8441 }
8442
8443 if (indicator) {
8444 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8445 } else {
8446 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8447 if (cursor != end) return false;
8448 }
8449
8450 // Here, we need to do some processing on the key to swap out dashes for
8451 // underscores. We only need to do this if there _is_ a dash in the key.
8452 pm_string_t key;
8453 const size_t key_length = (size_t) (key_end - key_start);
8454 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8455
8456 if (dash == NULL) {
8457 pm_string_shared_init(&key, key_start, key_end);
8458 } else {
8459 uint8_t *buffer = xmalloc(key_length);
8460 if (buffer == NULL) break;
8461
8462 memcpy(buffer, key_start, key_length);
8463 buffer[dash - key_start] = '_';
8464
8465 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8466 buffer[dash - key_start] = '_';
8467 }
8468
8469 pm_string_owned_init(&key, buffer, key_length);
8470 }
8471
8472 // Finally, we can start checking the key against the list of known
8473 // magic comment keys, and potentially change state based on that.
8474 const uint8_t *key_source = pm_string_source(&key);
8475 uint32_t value_length = (uint32_t) (value_end - value_start);
8476
8477 // We only want to attempt to compare against encoding comments if it's
8478 // the first line in the file (or the second in the case of a shebang).
8479 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8480 if (
8481 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8482 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8483 ) {
8484 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8485 }
8486 }
8487
8488 if (key_length == 11) {
8489 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8490 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8491 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8492 PM_PARSER_WARN_TOKEN_FORMAT(
8493 parser,
8494 parser->current,
8495 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8496 (int) key_length,
8497 (const char *) key_source,
8498 (int) value_length,
8499 (const char *) value_start
8500 );
8501 break;
8502 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8503 parser->warn_mismatched_indentation = false;
8504 break;
8505 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8506 parser->warn_mismatched_indentation = true;
8507 break;
8508 }
8509 }
8510 } else if (key_length == 21) {
8511 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8512 // We only want to handle frozen string literal comments if it's
8513 // before any semantic tokens have been seen.
8514 if (semantic_token_seen) {
8515 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8516 } else {
8517 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8518 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8519 PM_PARSER_WARN_TOKEN_FORMAT(
8520 parser,
8521 parser->current,
8522 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8523 (int) key_length,
8524 (const char *) key_source,
8525 (int) value_length,
8526 (const char *) value_start
8527 );
8528 break;
8529 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8531 break;
8532 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8534 break;
8535 }
8536 }
8537 }
8538 } else if (key_length == 24) {
8539 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8540 const uint8_t *cursor = parser->current.start;
8541 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8542
8543 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8544 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8545 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8546 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8547 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8548 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8549 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8550 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8551 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8552 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8553 } else {
8554 PM_PARSER_WARN_TOKEN_FORMAT(
8555 parser,
8556 parser->current,
8557 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8558 (int) key_length,
8559 (const char *) key_source,
8560 (int) value_length,
8561 (const char *) value_start
8562 );
8563 }
8564 }
8565 }
8566
8567 // When we're done, we want to free the string in case we had to
8568 // allocate memory for it.
8569 pm_string_free(&key);
8570
8571 // Allocate a new magic comment node to append to the parser's list.
8573 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8574 magic_comment->key_start = key_start;
8575 magic_comment->value_start = value_start;
8576 magic_comment->key_length = (uint32_t) key_length;
8577 magic_comment->value_length = value_length;
8578 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8579 }
8580 }
8581
8582 return result;
8583}
8584
8585/******************************************************************************/
8586/* Context manipulations */
8587/******************************************************************************/
8588
8589static const uint32_t context_terminators[] = {
8590 [PM_CONTEXT_NONE] = 0,
8591 [PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8592 [PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8593 [PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8594 [PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8595 [PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8596 [PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8597 [PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8598 [PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8599 [PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8600 [PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8601 [PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8602 [PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8603 [PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8604 [PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8605 [PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8606 [PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8607 [PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8608 [PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8609 [PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8610 [PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF),
8611 [PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF),
8612 [PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8613 [PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END),
8614 [PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8615 [PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END),
8616 [PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END),
8617 [PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN),
8618 [PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8619 [PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8620 [PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8621 [PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8622 [PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8623 [PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8624 [PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN),
8625 [PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF),
8626 [PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8627 [PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8628 [PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8629 [PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8630 [PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF),
8631 [PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8632 [PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8633 [PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON),
8634 [PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8635 [PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF),
8636 [PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8637 [PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8638 [PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8639 [PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8640 [PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF),
8641 [PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8642 [PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END),
8643 [PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END),
8644};
8645
8646static inline bool
8647context_terminator(pm_context_t context, pm_token_t *token) {
8648 return token->type < 32 && (context_terminators[context] & (1 << token->type));
8649}
8650
8655static pm_context_t
8656context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8657 pm_context_node_t *context_node = parser->current_context;
8658
8659 while (context_node != NULL) {
8660 if (context_terminator(context_node->context, token)) return context_node->context;
8661 context_node = context_node->prev;
8662 }
8663
8664 return PM_CONTEXT_NONE;
8665}
8666
8667static bool
8668context_push(pm_parser_t *parser, pm_context_t context) {
8669 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8670 if (context_node == NULL) return false;
8671
8672 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8673
8674 if (parser->current_context == NULL) {
8675 parser->current_context = context_node;
8676 } else {
8677 context_node->prev = parser->current_context;
8678 parser->current_context = context_node;
8679 }
8680
8681 return true;
8682}
8683
8684static void
8685context_pop(pm_parser_t *parser) {
8686 pm_context_node_t *prev = parser->current_context->prev;
8687 xfree(parser->current_context);
8688 parser->current_context = prev;
8689}
8690
8691static bool
8692context_p(const pm_parser_t *parser, pm_context_t context) {
8693 pm_context_node_t *context_node = parser->current_context;
8694
8695 while (context_node != NULL) {
8696 if (context_node->context == context) return true;
8697 context_node = context_node->prev;
8698 }
8699
8700 return false;
8701}
8702
8703static bool
8704context_def_p(const pm_parser_t *parser) {
8705 pm_context_node_t *context_node = parser->current_context;
8706
8707 while (context_node != NULL) {
8708 switch (context_node->context) {
8709 case PM_CONTEXT_DEF:
8714 return true;
8715 case PM_CONTEXT_CLASS:
8719 case PM_CONTEXT_MODULE:
8723 case PM_CONTEXT_SCLASS:
8727 return false;
8728 default:
8729 context_node = context_node->prev;
8730 }
8731 }
8732
8733 return false;
8734}
8735
8740static const char *
8741context_human(pm_context_t context) {
8742 switch (context) {
8743 case PM_CONTEXT_NONE:
8744 assert(false && "unreachable");
8745 return "";
8746 case PM_CONTEXT_BEGIN: return "begin statement";
8747 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8748 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8749 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8750 case PM_CONTEXT_CASE_IN: return "'in' clause";
8751 case PM_CONTEXT_CLASS: return "class definition";
8752 case PM_CONTEXT_DEF: return "method definition";
8753 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8754 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8755 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8756 case PM_CONTEXT_ELSE:
8763 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8764 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8765 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8772 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8773 case PM_CONTEXT_FOR: return "for loop";
8774 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8775 case PM_CONTEXT_IF: return "if statement";
8776 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8777 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8778 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8779 case PM_CONTEXT_MAIN: return "top level context";
8780 case PM_CONTEXT_MODULE: return "module definition";
8781 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8782 case PM_CONTEXT_PARENS: return "parentheses";
8783 case PM_CONTEXT_POSTEXE: return "'END' block";
8784 case PM_CONTEXT_PREDICATE: return "predicate";
8785 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8793 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8794 case PM_CONTEXT_SCLASS: return "singleton class definition";
8795 case PM_CONTEXT_TERNARY: return "ternary expression";
8796 case PM_CONTEXT_UNLESS: return "unless statement";
8797 case PM_CONTEXT_UNTIL: return "until statement";
8798 case PM_CONTEXT_WHILE: return "while statement";
8799 }
8800
8801 assert(false && "unreachable");
8802 return "";
8803}
8804
8805/******************************************************************************/
8806/* Specific token lexers */
8807/******************************************************************************/
8808
8809static inline void
8810pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8811 if (invalid != NULL) {
8812 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8813 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8814 }
8815}
8816
8817static size_t
8818pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8819 const uint8_t *invalid = NULL;
8820 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8821 pm_strspn_number_validate(parser, string, length, invalid);
8822 return length;
8823}
8824
8825static size_t
8826pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8827 const uint8_t *invalid = NULL;
8828 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8829 pm_strspn_number_validate(parser, string, length, invalid);
8830 return length;
8831}
8832
8833static size_t
8834pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8835 const uint8_t *invalid = NULL;
8836 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8837 pm_strspn_number_validate(parser, string, length, invalid);
8838 return length;
8839}
8840
8841static size_t
8842pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8843 const uint8_t *invalid = NULL;
8844 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8845 pm_strspn_number_validate(parser, string, length, invalid);
8846 return length;
8847}
8848
8849static pm_token_type_t
8850lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8851 pm_token_type_t type = PM_TOKEN_INTEGER;
8852
8853 // Here we're going to attempt to parse the optional decimal portion of a
8854 // float. If it's not there, then it's okay and we'll just continue on.
8855 if (peek(parser) == '.') {
8856 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8857 parser->current.end += 2;
8858 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8859 type = PM_TOKEN_FLOAT;
8860 } else {
8861 // If we had a . and then something else, then it's not a float
8862 // suffix on a number it's a method call or something else.
8863 return type;
8864 }
8865 }
8866
8867 // Here we're going to attempt to parse the optional exponent portion of a
8868 // float. If it's not there, it's okay and we'll just continue on.
8869 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8870 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8871 parser->current.end += 2;
8872
8873 if (pm_char_is_decimal_digit(peek(parser))) {
8874 parser->current.end++;
8875 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8876 } else {
8877 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8878 }
8879 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8880 parser->current.end++;
8881 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8882 } else {
8883 return type;
8884 }
8885
8886 *seen_e = true;
8887 type = PM_TOKEN_FLOAT;
8888 }
8889
8890 return type;
8891}
8892
8893static pm_token_type_t
8894lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8895 pm_token_type_t type = PM_TOKEN_INTEGER;
8896 *seen_e = false;
8897
8898 if (peek_offset(parser, -1) == '0') {
8899 switch (*parser->current.end) {
8900 // 0d1111 is a decimal number
8901 case 'd':
8902 case 'D':
8903 parser->current.end++;
8904 if (pm_char_is_decimal_digit(peek(parser))) {
8905 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8906 } else {
8907 match(parser, '_');
8908 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8909 }
8910
8911 break;
8912
8913 // 0b1111 is a binary number
8914 case 'b':
8915 case 'B':
8916 parser->current.end++;
8917 if (pm_char_is_binary_digit(peek(parser))) {
8918 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8919 } else {
8920 match(parser, '_');
8921 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8922 }
8923
8924 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8925 break;
8926
8927 // 0o1111 is an octal number
8928 case 'o':
8929 case 'O':
8930 parser->current.end++;
8931 if (pm_char_is_octal_digit(peek(parser))) {
8932 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8933 } else {
8934 match(parser, '_');
8935 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8936 }
8937
8938 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8939 break;
8940
8941 // 01111 is an octal number
8942 case '_':
8943 case '0':
8944 case '1':
8945 case '2':
8946 case '3':
8947 case '4':
8948 case '5':
8949 case '6':
8950 case '7':
8951 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8952 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8953 break;
8954
8955 // 0x1111 is a hexadecimal number
8956 case 'x':
8957 case 'X':
8958 parser->current.end++;
8959 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8960 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8961 } else {
8962 match(parser, '_');
8963 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8964 }
8965
8966 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8967 break;
8968
8969 // 0.xxx is a float
8970 case '.': {
8971 type = lex_optional_float_suffix(parser, seen_e);
8972 break;
8973 }
8974
8975 // 0exxx is a float
8976 case 'e':
8977 case 'E': {
8978 type = lex_optional_float_suffix(parser, seen_e);
8979 break;
8980 }
8981 }
8982 } else {
8983 // If it didn't start with a 0, then we'll lex as far as we can into a
8984 // decimal number.
8985 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8986
8987 // Afterward, we'll lex as far as we can into an optional float suffix.
8988 type = lex_optional_float_suffix(parser, seen_e);
8989 }
8990
8991 // At this point we have a completed number, but we want to provide the user
8992 // with a good experience if they put an additional .xxx fractional
8993 // component on the end, so we'll check for that here.
8994 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8995 const uint8_t *fraction_start = parser->current.end;
8996 const uint8_t *fraction_end = parser->current.end + 2;
8997 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8998 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8999 }
9000
9001 return type;
9002}
9003
9004static pm_token_type_t
9005lex_numeric(pm_parser_t *parser) {
9006 pm_token_type_t type = PM_TOKEN_INTEGER;
9007 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
9008
9009 if (parser->current.end < parser->end) {
9010 bool seen_e = false;
9011 type = lex_numeric_prefix(parser, &seen_e);
9012
9013 const uint8_t *end = parser->current.end;
9014 pm_token_type_t suffix_type = type;
9015
9016 if (type == PM_TOKEN_INTEGER) {
9017 if (match(parser, 'r')) {
9018 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9019
9020 if (match(parser, 'i')) {
9021 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
9022 }
9023 } else if (match(parser, 'i')) {
9024 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9025 }
9026 } else {
9027 if (!seen_e && match(parser, 'r')) {
9028 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9029
9030 if (match(parser, 'i')) {
9031 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9032 }
9033 } else if (match(parser, 'i')) {
9034 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9035 }
9036 }
9037
9038 const uint8_t b = peek(parser);
9039 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9040 parser->current.end = end;
9041 } else {
9042 type = suffix_type;
9043 }
9044 }
9045
9046 return type;
9047}
9048
9049static pm_token_type_t
9050lex_global_variable(pm_parser_t *parser) {
9051 if (parser->current.end >= parser->end) {
9052 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9053 return PM_TOKEN_GLOBAL_VARIABLE;
9054 }
9055
9056 // True if multiple characters are allowed after the declaration of the
9057 // global variable. Not true when it starts with "$-".
9058 bool allow_multiple = true;
9059
9060 switch (*parser->current.end) {
9061 case '~': // $~: match-data
9062 case '*': // $*: argv
9063 case '$': // $$: pid
9064 case '?': // $?: last status
9065 case '!': // $!: error string
9066 case '@': // $@: error position
9067 case '/': // $/: input record separator
9068 case '\\': // $\: output record separator
9069 case ';': // $;: field separator
9070 case ',': // $,: output field separator
9071 case '.': // $.: last read line number
9072 case '=': // $=: ignorecase
9073 case ':': // $:: load path
9074 case '<': // $<: reading filename
9075 case '>': // $>: default output handle
9076 case '\"': // $": already loaded files
9077 parser->current.end++;
9078 return PM_TOKEN_GLOBAL_VARIABLE;
9079
9080 case '&': // $&: last match
9081 case '`': // $`: string before last match
9082 case '\'': // $': string after last match
9083 case '+': // $+: string matches last paren.
9084 parser->current.end++;
9085 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9086
9087 case '0': {
9088 parser->current.end++;
9089 size_t width;
9090
9091 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9092 do {
9093 parser->current.end += width;
9094 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9095
9096 // $0 isn't allowed to be followed by anything.
9097 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9098 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9099 }
9100
9101 return PM_TOKEN_GLOBAL_VARIABLE;
9102 }
9103
9104 case '1':
9105 case '2':
9106 case '3':
9107 case '4':
9108 case '5':
9109 case '6':
9110 case '7':
9111 case '8':
9112 case '9':
9113 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9114 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9115
9116 case '-':
9117 parser->current.end++;
9118 allow_multiple = false;
9120 default: {
9121 size_t width;
9122
9123 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9124 do {
9125 parser->current.end += width;
9126 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9127 } else if (pm_char_is_whitespace(peek(parser))) {
9128 // If we get here, then we have a $ followed by whitespace,
9129 // which is not allowed.
9130 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9131 } else {
9132 // If we get here, then we have a $ followed by something that
9133 // isn't recognized as a global variable.
9134 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9135 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9136 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9137 }
9138
9139 return PM_TOKEN_GLOBAL_VARIABLE;
9140 }
9141 }
9142}
9143
9156static inline pm_token_type_t
9157lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9158 if (memcmp(current_start, value, vlen) == 0) {
9159 pm_lex_state_t last_state = parser->lex_state;
9160
9161 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9162 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9163 } else {
9164 lex_state_set(parser, state);
9165 if (state == PM_LEX_STATE_BEG) {
9166 parser->command_start = true;
9167 }
9168
9169 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9170 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9171 return modifier_type;
9172 }
9173 }
9174
9175 return type;
9176 }
9177
9178 return PM_TOKEN_EOF;
9179}
9180
9181static pm_token_type_t
9182lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9183 // Lex as far as we can into the current identifier.
9184 size_t width;
9185 const uint8_t *end = parser->end;
9186 const uint8_t *current_start = parser->current.start;
9187 const uint8_t *current_end = parser->current.end;
9188 bool encoding_changed = parser->encoding_changed;
9189
9190 if (encoding_changed) {
9191 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9192 current_end += width;
9193 }
9194 } else {
9195 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9196 current_end += width;
9197 }
9198 }
9199 parser->current.end = current_end;
9200
9201 // Now cache the length of the identifier so that we can quickly compare it
9202 // against known keywords.
9203 width = (size_t) (current_end - current_start);
9204
9205 if (current_end < end) {
9206 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9207 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9208 // check if we're returning the defined? keyword or just an identifier.
9209 width++;
9210
9211 if (
9212 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9213 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9214 ) {
9215 // If we're in a position where we can accept a : at the end of an
9216 // identifier, then we'll optionally accept it.
9217 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9218 (void) match(parser, ':');
9219 return PM_TOKEN_LABEL;
9220 }
9221
9222 if (parser->lex_state != PM_LEX_STATE_DOT) {
9223 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9224 return PM_TOKEN_KEYWORD_DEFINED;
9225 }
9226 }
9227
9228 return PM_TOKEN_METHOD_NAME;
9229 }
9230
9231 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9232 // If we're in a position where we can accept a = at the end of an
9233 // identifier, then we'll optionally accept it.
9234 return PM_TOKEN_IDENTIFIER;
9235 }
9236
9237 if (
9238 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9239 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9240 ) {
9241 // If we're in a position where we can accept a : at the end of an
9242 // identifier, then we'll optionally accept it.
9243 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9244 (void) match(parser, ':');
9245 return PM_TOKEN_LABEL;
9246 }
9247 }
9248
9249 if (parser->lex_state != PM_LEX_STATE_DOT) {
9250 pm_token_type_t type;
9251 switch (width) {
9252 case 2:
9253 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9254 if (pm_do_loop_stack_p(parser)) {
9255 return PM_TOKEN_KEYWORD_DO_LOOP;
9256 }
9257 return PM_TOKEN_KEYWORD_DO;
9258 }
9259
9260 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9261 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9262 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9263 break;
9264 case 3:
9265 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9267 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9268 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 break;
9273 case 4:
9274 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 break;
9283 case 5:
9284 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 break;
9298 case 6:
9299 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9302 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9304 break;
9305 case 8:
9306 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9308 break;
9309 case 12:
9310 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9311 break;
9312 }
9313 }
9314
9315 if (encoding_changed) {
9316 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9317 }
9318 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9319}
9320
9325static bool
9326current_token_starts_line(pm_parser_t *parser) {
9327 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9328}
9329
9344static pm_token_type_t
9345lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9346 // If there is no content following this #, then we're at the end of
9347 // the string and we can safely return string content.
9348 if (pound + 1 >= parser->end) {
9349 parser->current.end = pound + 1;
9350 return PM_TOKEN_STRING_CONTENT;
9351 }
9352
9353 // Now we'll check against the character that follows the #. If it constitutes
9354 // valid interplation, we'll handle that, otherwise we'll return
9355 // PM_TOKEN_NOT_PROVIDED.
9356 switch (pound[1]) {
9357 case '@': {
9358 // In this case we may have hit an embedded instance or class variable.
9359 if (pound + 2 >= parser->end) {
9360 parser->current.end = pound + 1;
9361 return PM_TOKEN_STRING_CONTENT;
9362 }
9363
9364 // If we're looking at a @ and there's another @, then we'll skip past the
9365 // second @.
9366 const uint8_t *variable = pound + 2;
9367 if (*variable == '@' && pound + 3 < parser->end) variable++;
9368
9369 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9370 // At this point we're sure that we've either hit an embedded instance
9371 // or class variable. In this case we'll first need to check if we've
9372 // already consumed content.
9373 if (pound > parser->current.start) {
9374 parser->current.end = pound;
9375 return PM_TOKEN_STRING_CONTENT;
9376 }
9377
9378 // Otherwise we need to return the embedded variable token
9379 // and then switch to the embedded variable lex mode.
9380 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9381 parser->current.end = pound + 1;
9382 return PM_TOKEN_EMBVAR;
9383 }
9384
9385 // If we didn't get a valid interpolation, then this is just regular
9386 // string content. This is like if we get "#@-". In this case the caller
9387 // should keep lexing.
9388 parser->current.end = pound + 1;
9389 return PM_TOKEN_NOT_PROVIDED;
9390 }
9391 case '$':
9392 // In this case we may have hit an embedded global variable. If there's
9393 // not enough room, then we'll just return string content.
9394 if (pound + 2 >= parser->end) {
9395 parser->current.end = pound + 1;
9396 return PM_TOKEN_STRING_CONTENT;
9397 }
9398
9399 // This is the character that we're going to check to see if it is the
9400 // start of an identifier that would indicate that this is a global
9401 // variable.
9402 const uint8_t *check = pound + 2;
9403
9404 if (pound[2] == '-') {
9405 if (pound + 3 >= parser->end) {
9406 parser->current.end = pound + 2;
9407 return PM_TOKEN_STRING_CONTENT;
9408 }
9409
9410 check++;
9411 }
9412
9413 // If the character that we're going to check is the start of an
9414 // identifier, or we don't have a - and the character is a decimal number
9415 // or a global name punctuation character, then we've hit an embedded
9416 // global variable.
9417 if (
9418 char_is_identifier_start(parser, check, parser->end - check) ||
9419 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9420 ) {
9421 // In this case we've hit an embedded global variable. First check to
9422 // see if we've already consumed content. If we have, then we need to
9423 // return that content as string content first.
9424 if (pound > parser->current.start) {
9425 parser->current.end = pound;
9426 return PM_TOKEN_STRING_CONTENT;
9427 }
9428
9429 // Otherwise, we need to return the embedded variable token and switch
9430 // to the embedded variable lex mode.
9431 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9432 parser->current.end = pound + 1;
9433 return PM_TOKEN_EMBVAR;
9434 }
9435
9436 // In this case we've hit a #$ that does not indicate a global variable.
9437 // In this case we'll continue lexing past it.
9438 parser->current.end = pound + 1;
9439 return PM_TOKEN_NOT_PROVIDED;
9440 case '{':
9441 // In this case it's the start of an embedded expression. If we have
9442 // already consumed content, then we need to return that content as string
9443 // content first.
9444 if (pound > parser->current.start) {
9445 parser->current.end = pound;
9446 return PM_TOKEN_STRING_CONTENT;
9447 }
9448
9449 parser->enclosure_nesting++;
9450
9451 // Otherwise we'll skip past the #{ and begin lexing the embedded
9452 // expression.
9453 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9454 parser->current.end = pound + 2;
9455 parser->command_start = true;
9456 pm_do_loop_stack_push(parser, false);
9457 return PM_TOKEN_EMBEXPR_BEGIN;
9458 default:
9459 // In this case we've hit a # that doesn't constitute interpolation. We'll
9460 // mark that by returning the not provided token type. This tells the
9461 // consumer to keep lexing forward.
9462 parser->current.end = pound + 1;
9463 return PM_TOKEN_NOT_PROVIDED;
9464 }
9465}
9466
9467static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9468static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9469static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9470static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9471static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9472
9476static const bool ascii_printable_chars[] = {
9477 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9482 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9485};
9486
9487static inline bool
9488char_is_ascii_printable(const uint8_t b) {
9489 return (b < 0x80) && ascii_printable_chars[b];
9490}
9491
9496static inline uint8_t
9497escape_hexadecimal_digit(const uint8_t value) {
9498 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9499}
9500
9506static inline uint32_t
9507escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9508 uint32_t value = 0;
9509 for (size_t index = 0; index < length; index++) {
9510 if (index != 0) value <<= 4;
9511 value |= escape_hexadecimal_digit(string[index]);
9512 }
9513
9514 // Here we're going to verify that the value is actually a valid Unicode
9515 // codepoint and not a surrogate pair.
9516 if (value >= 0xD800 && value <= 0xDFFF) {
9517 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9518 return 0xFFFD;
9519 }
9520
9521 return value;
9522}
9523
9527static inline uint8_t
9528escape_byte(uint8_t value, const uint8_t flags) {
9529 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9530 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9531 return value;
9532}
9533
9537static inline void
9538escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9539 // \u escape sequences in string-like structures implicitly change the
9540 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9541 // literal.
9542 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9543 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9544 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9545 }
9546
9548 }
9549
9550 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9551 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9552 pm_buffer_append_byte(buffer, 0xEF);
9553 pm_buffer_append_byte(buffer, 0xBF);
9554 pm_buffer_append_byte(buffer, 0xBD);
9555 }
9556}
9557
9562static inline void
9563escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9564 if (byte >= 0x80) {
9565 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9566 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9567 }
9568
9569 parser->explicit_encoding = parser->encoding;
9570 }
9571
9572 pm_buffer_append_byte(buffer, byte);
9573}
9574
9590static inline void
9591escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9592 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9593 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9594 }
9595
9596 escape_write_byte_encoded(parser, buffer, byte);
9597}
9598
9602static inline void
9603escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9604 size_t width;
9605 if (parser->encoding_changed) {
9606 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9607 } else {
9608 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9609 }
9610
9611 if (width == 1) {
9612 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9613 } else if (width > 1) {
9614 // Valid multibyte character. Just ignore escape.
9615 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9616 pm_buffer_append_bytes(b, parser->current.end, width);
9617 parser->current.end += width;
9618 } else {
9619 // Assume the next character wasn't meant to be part of this escape
9620 // sequence since it is invalid. Add an error and move on.
9621 parser->current.end++;
9622 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9623 }
9624}
9625
9631static void
9632escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9633#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9634
9635 PM_PARSER_WARN_TOKEN_FORMAT(
9636 parser,
9637 parser->current,
9638 PM_WARN_INVALID_CHARACTER,
9639 FLAG(flags),
9640 FLAG(flag),
9641 type
9642 );
9643
9644#undef FLAG
9645}
9646
9650static void
9651escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9652 uint8_t peeked = peek(parser);
9653 switch (peeked) {
9654 case '\\': {
9655 parser->current.end++;
9656 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9657 return;
9658 }
9659 case '\'': {
9660 parser->current.end++;
9661 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9662 return;
9663 }
9664 case 'a': {
9665 parser->current.end++;
9666 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9667 return;
9668 }
9669 case 'b': {
9670 parser->current.end++;
9671 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9672 return;
9673 }
9674 case 'e': {
9675 parser->current.end++;
9676 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9677 return;
9678 }
9679 case 'f': {
9680 parser->current.end++;
9681 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9682 return;
9683 }
9684 case 'n': {
9685 parser->current.end++;
9686 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9687 return;
9688 }
9689 case 'r': {
9690 parser->current.end++;
9691 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9692 return;
9693 }
9694 case 's': {
9695 parser->current.end++;
9696 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9697 return;
9698 }
9699 case 't': {
9700 parser->current.end++;
9701 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9702 return;
9703 }
9704 case 'v': {
9705 parser->current.end++;
9706 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9707 return;
9708 }
9709 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9710 uint8_t value = (uint8_t) (*parser->current.end - '0');
9711 parser->current.end++;
9712
9713 if (pm_char_is_octal_digit(peek(parser))) {
9714 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9715 parser->current.end++;
9716
9717 if (pm_char_is_octal_digit(peek(parser))) {
9718 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9719 parser->current.end++;
9720 }
9721 }
9722
9723 value = escape_byte(value, flags);
9724 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9725 return;
9726 }
9727 case 'x': {
9728 const uint8_t *start = parser->current.end - 1;
9729
9730 parser->current.end++;
9731 uint8_t byte = peek(parser);
9732
9733 if (pm_char_is_hexadecimal_digit(byte)) {
9734 uint8_t value = escape_hexadecimal_digit(byte);
9735 parser->current.end++;
9736
9737 byte = peek(parser);
9738 if (pm_char_is_hexadecimal_digit(byte)) {
9739 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9740 parser->current.end++;
9741 }
9742
9743 value = escape_byte(value, flags);
9744 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9745 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9746 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9747 } else {
9748 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9749 }
9750 }
9751
9752 escape_write_byte_encoded(parser, buffer, value);
9753 } else {
9754 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9755 }
9756
9757 return;
9758 }
9759 case 'u': {
9760 const uint8_t *start = parser->current.end - 1;
9761 parser->current.end++;
9762
9763 if (parser->current.end == parser->end) {
9764 const uint8_t *start = parser->current.end - 2;
9765 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9766 } else if (peek(parser) == '{') {
9767 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9768 parser->current.end++;
9769
9770 size_t whitespace;
9771 while (true) {
9772 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9773 parser->current.end += whitespace;
9774 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9775 // This is super hacky, but it gets us nicer error
9776 // messages because we can still pass it off to the
9777 // regular expression engine even if we hit an
9778 // unterminated regular expression.
9779 parser->current.end += 2;
9780 } else {
9781 break;
9782 }
9783 }
9784
9785 const uint8_t *extra_codepoints_start = NULL;
9786 int codepoints_count = 0;
9787
9788 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9789 const uint8_t *unicode_start = parser->current.end;
9790 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9791
9792 if (hexadecimal_length > 6) {
9793 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9794 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9795 } else if (hexadecimal_length == 0) {
9796 // there are not hexadecimal characters
9797
9798 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9799 // If this is a regular expression, we are going to
9800 // let the regular expression engine handle this
9801 // error instead of us.
9802 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9803 } else {
9804 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9805 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9806 }
9807
9808 return;
9809 }
9810
9811 parser->current.end += hexadecimal_length;
9812 codepoints_count++;
9813 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9814 extra_codepoints_start = unicode_start;
9815 }
9816
9817 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9818 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9819
9820 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9821 }
9822
9823 // ?\u{nnnn} character literal should contain only one codepoint
9824 // and cannot be like ?\u{nnnn mmmm}.
9825 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9826 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9827 }
9828
9829 if (parser->current.end == parser->end) {
9830 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9831 } else if (peek(parser) == '}') {
9832 parser->current.end++;
9833 } else {
9834 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9835 // If this is a regular expression, we are going to let
9836 // the regular expression engine handle this error
9837 // instead of us.
9838 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9839 } else {
9840 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9841 }
9842 }
9843
9844 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9845 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9846 }
9847 } else {
9848 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9849
9850 if (length == 0) {
9851 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9852 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9853 } else {
9854 const uint8_t *start = parser->current.end - 2;
9855 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9856 }
9857 } else if (length == 4) {
9858 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9859
9860 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9861 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9862 }
9863
9864 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9865 parser->current.end += 4;
9866 } else {
9867 parser->current.end += length;
9868
9869 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9870 // If this is a regular expression, we are going to let
9871 // the regular expression engine handle this error
9872 // instead of us.
9873 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9874 } else {
9875 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9876 }
9877 }
9878 }
9879
9880 return;
9881 }
9882 case 'c': {
9883 parser->current.end++;
9884 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9885 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9886 }
9887
9888 if (parser->current.end == parser->end) {
9889 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9890 return;
9891 }
9892
9893 uint8_t peeked = peek(parser);
9894 switch (peeked) {
9895 case '?': {
9896 parser->current.end++;
9897 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9898 return;
9899 }
9900 case '\\':
9901 parser->current.end++;
9902
9903 if (match(parser, 'u') || match(parser, 'U')) {
9904 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9905 return;
9906 }
9907
9908 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9909 return;
9910 case ' ':
9911 parser->current.end++;
9912 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9913 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9914 return;
9915 case '\t':
9916 parser->current.end++;
9917 escape_read_warn(parser, flags, 0, "\\t");
9918 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9919 return;
9920 default: {
9921 if (!char_is_ascii_printable(peeked)) {
9922 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9923 return;
9924 }
9925
9926 parser->current.end++;
9927 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9928 return;
9929 }
9930 }
9931 }
9932 case 'C': {
9933 parser->current.end++;
9934 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9935 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9936 }
9937
9938 if (peek(parser) != '-') {
9939 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9940 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9941 return;
9942 }
9943
9944 parser->current.end++;
9945 if (parser->current.end == parser->end) {
9946 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9947 return;
9948 }
9949
9950 uint8_t peeked = peek(parser);
9951 switch (peeked) {
9952 case '?': {
9953 parser->current.end++;
9954 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9955 return;
9956 }
9957 case '\\':
9958 parser->current.end++;
9959
9960 if (match(parser, 'u') || match(parser, 'U')) {
9961 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9962 return;
9963 }
9964
9965 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9966 return;
9967 case ' ':
9968 parser->current.end++;
9969 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9970 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9971 return;
9972 case '\t':
9973 parser->current.end++;
9974 escape_read_warn(parser, flags, 0, "\\t");
9975 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9976 return;
9977 default: {
9978 if (!char_is_ascii_printable(peeked)) {
9979 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9980 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9981 return;
9982 }
9983
9984 parser->current.end++;
9985 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9986 return;
9987 }
9988 }
9989 }
9990 case 'M': {
9991 parser->current.end++;
9992 if (flags & PM_ESCAPE_FLAG_META) {
9993 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9994 }
9995
9996 if (peek(parser) != '-') {
9997 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9998 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9999 return;
10000 }
10001
10002 parser->current.end++;
10003 if (parser->current.end == parser->end) {
10004 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10005 return;
10006 }
10007
10008 uint8_t peeked = peek(parser);
10009 switch (peeked) {
10010 case '\\':
10011 parser->current.end++;
10012
10013 if (match(parser, 'u') || match(parser, 'U')) {
10014 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10015 return;
10016 }
10017
10018 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10019 return;
10020 case ' ':
10021 parser->current.end++;
10022 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10023 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10024 return;
10025 case '\t':
10026 parser->current.end++;
10027 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10028 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10029 return;
10030 default:
10031 if (!char_is_ascii_printable(peeked)) {
10032 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10033 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10034 return;
10035 }
10036
10037 parser->current.end++;
10038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10039 return;
10040 }
10041 }
10042 case '\r': {
10043 if (peek_offset(parser, 1) == '\n') {
10044 parser->current.end += 2;
10045 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10046 return;
10047 }
10049 }
10050 default: {
10051 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10052 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10053 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10054 return;
10055 }
10056 if (parser->current.end < parser->end) {
10057 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10058 } else {
10059 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10060 }
10061 return;
10062 }
10063 }
10064}
10065
10091static pm_token_type_t
10092lex_question_mark(pm_parser_t *parser) {
10093 if (lex_state_end_p(parser)) {
10094 lex_state_set(parser, PM_LEX_STATE_BEG);
10095 return PM_TOKEN_QUESTION_MARK;
10096 }
10097
10098 if (parser->current.end >= parser->end) {
10099 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10100 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10101 return PM_TOKEN_CHARACTER_LITERAL;
10102 }
10103
10104 if (pm_char_is_whitespace(*parser->current.end)) {
10105 lex_state_set(parser, PM_LEX_STATE_BEG);
10106 return PM_TOKEN_QUESTION_MARK;
10107 }
10108
10109 lex_state_set(parser, PM_LEX_STATE_BEG);
10110
10111 if (match(parser, '\\')) {
10112 lex_state_set(parser, PM_LEX_STATE_END);
10113
10114 pm_buffer_t buffer;
10115 pm_buffer_init_capacity(&buffer, 3);
10116
10117 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10118 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10119
10120 return PM_TOKEN_CHARACTER_LITERAL;
10121 } else {
10122 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10123
10124 // Ternary operators can have a ? immediately followed by an identifier
10125 // which starts with an underscore. We check for this case here.
10126 if (
10127 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10128 (
10129 (parser->current.end + encoding_width >= parser->end) ||
10130 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10131 )
10132 ) {
10133 lex_state_set(parser, PM_LEX_STATE_END);
10134 parser->current.end += encoding_width;
10135 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10136 return PM_TOKEN_CHARACTER_LITERAL;
10137 }
10138 }
10139
10140 return PM_TOKEN_QUESTION_MARK;
10141}
10142
10147static pm_token_type_t
10148lex_at_variable(pm_parser_t *parser) {
10149 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10150 const uint8_t *end = parser->end;
10151
10152 size_t width;
10153 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10154 parser->current.end += width;
10155
10156 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10157 parser->current.end += width;
10158 }
10159 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10160 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10161 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
10162 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10163 }
10164
10165 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10166 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10167 } else {
10168 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10169 pm_parser_err_token(parser, &parser->current, diag_id);
10170 }
10171
10172 // If we're lexing an embedded variable, then we need to pop back into the
10173 // parent lex context.
10174 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10175 lex_mode_pop(parser);
10176 }
10177
10178 return type;
10179}
10180
10184static inline void
10185parser_lex_callback(pm_parser_t *parser) {
10186 if (parser->lex_callback) {
10187 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10188 }
10189}
10190
10194static inline pm_comment_t *
10195parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10196 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10197 if (comment == NULL) return NULL;
10198
10199 *comment = (pm_comment_t) {
10200 .type = type,
10201 .location = { parser->current.start, parser->current.end }
10202 };
10203
10204 return comment;
10205}
10206
10212static pm_token_type_t
10213lex_embdoc(pm_parser_t *parser) {
10214 // First, lex out the EMBDOC_BEGIN token.
10215 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10216
10217 if (newline == NULL) {
10218 parser->current.end = parser->end;
10219 } else {
10220 pm_newline_list_append(&parser->newline_list, newline);
10221 parser->current.end = newline + 1;
10222 }
10223
10224 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10225 parser_lex_callback(parser);
10226
10227 // Now, create a comment that is going to be attached to the parser.
10228 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10229 if (comment == NULL) return PM_TOKEN_EOF;
10230
10231 // Now, loop until we find the end of the embedded documentation or the end
10232 // of the file.
10233 while (parser->current.end + 4 <= parser->end) {
10234 parser->current.start = parser->current.end;
10235
10236 // If we've hit the end of the embedded documentation then we'll return
10237 // that token here.
10238 if (
10239 (memcmp(parser->current.end, "=end", 4) == 0) &&
10240 (
10241 (parser->current.end + 4 == parser->end) || // end of file
10242 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10243 (parser->current.end[4] == '\0') || // NUL or end of script
10244 (parser->current.end[4] == '\004') || // ^D
10245 (parser->current.end[4] == '\032') // ^Z
10246 )
10247 ) {
10248 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10249
10250 if (newline == NULL) {
10251 parser->current.end = parser->end;
10252 } else {
10253 pm_newline_list_append(&parser->newline_list, newline);
10254 parser->current.end = newline + 1;
10255 }
10256
10257 parser->current.type = PM_TOKEN_EMBDOC_END;
10258 parser_lex_callback(parser);
10259
10260 comment->location.end = parser->current.end;
10261 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10262
10263 return PM_TOKEN_EMBDOC_END;
10264 }
10265
10266 // Otherwise, we'll parse until the end of the line and return a line of
10267 // embedded documentation.
10268 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10269
10270 if (newline == NULL) {
10271 parser->current.end = parser->end;
10272 } else {
10273 pm_newline_list_append(&parser->newline_list, newline);
10274 parser->current.end = newline + 1;
10275 }
10276
10277 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10278 parser_lex_callback(parser);
10279 }
10280
10281 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10282
10283 comment->location.end = parser->current.end;
10284 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10285
10286 return PM_TOKEN_EOF;
10287}
10288
10294static inline void
10295parser_lex_ignored_newline(pm_parser_t *parser) {
10296 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10297 parser_lex_callback(parser);
10298}
10299
10309static inline void
10310parser_flush_heredoc_end(pm_parser_t *parser) {
10311 assert(parser->heredoc_end <= parser->end);
10312 parser->next_start = parser->heredoc_end;
10313 parser->heredoc_end = NULL;
10314}
10315
10319static bool
10320parser_end_of_line_p(const pm_parser_t *parser) {
10321 const uint8_t *cursor = parser->current.end;
10322
10323 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10324 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10325 }
10326
10327 return true;
10328}
10329
10348typedef struct {
10354
10359 const uint8_t *cursor;
10361
10381
10385static inline void
10386pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10387 pm_buffer_append_byte(&token_buffer->buffer, byte);
10388}
10389
10390static inline void
10391pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10392 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10393}
10394
10398static inline size_t
10399parser_char_width(const pm_parser_t *parser) {
10400 size_t width;
10401 if (parser->encoding_changed) {
10402 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10403 } else {
10404 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10405 }
10406
10407 // TODO: If the character is invalid in the given encoding, then we'll just
10408 // push one byte into the buffer. This should actually be an error.
10409 return (width == 0 ? 1 : width);
10410}
10411
10415static void
10416pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10417 size_t width = parser_char_width(parser);
10418 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10419 parser->current.end += width;
10420}
10421
10422static void
10423pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10424 size_t width = parser_char_width(parser);
10425 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10426 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10427 parser->current.end += width;
10428}
10429
10430static bool
10431pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10432 for (size_t index = 0; index < length; index++) {
10433 if (value[index] & 0x80) return false;
10434 }
10435
10436 return true;
10437}
10438
10445static inline void
10446pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10447 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10448}
10449
10450static inline void
10451pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10452 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10453 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10454 pm_buffer_free(&token_buffer->regexp_buffer);
10455}
10456
10466static void
10467pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10468 if (token_buffer->cursor == NULL) {
10469 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10470 } else {
10471 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10472 pm_token_buffer_copy(parser, token_buffer);
10473 }
10474}
10475
10476static void
10477pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10478 if (token_buffer->base.cursor == NULL) {
10479 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10480 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10481 } else {
10482 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10483 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10484 pm_regexp_token_buffer_copy(parser, token_buffer);
10485 }
10486}
10487
10488#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10489
10498static void
10499pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10500 const uint8_t *start;
10501 if (token_buffer->cursor == NULL) {
10502 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10503 start = parser->current.start;
10504 } else {
10505 start = token_buffer->cursor;
10506 }
10507
10508 const uint8_t *end = parser->current.end - 1;
10509 assert(end >= start);
10510 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10511
10512 token_buffer->cursor = end;
10513}
10514
10515static void
10516pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10517 const uint8_t *start;
10518 if (token_buffer->base.cursor == NULL) {
10519 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10520 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10521 start = parser->current.start;
10522 } else {
10523 start = token_buffer->base.cursor;
10524 }
10525
10526 const uint8_t *end = parser->current.end - 1;
10527 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10528 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10529
10530 token_buffer->base.cursor = end;
10531}
10532
10533#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10534
10539static inline size_t
10540pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10541 size_t whitespace = 0;
10542
10543 switch (indent) {
10544 case PM_HEREDOC_INDENT_NONE:
10545 // Do nothing, we can't match a terminator with
10546 // indentation and there's no need to calculate common
10547 // whitespace.
10548 break;
10549 case PM_HEREDOC_INDENT_DASH:
10550 // Skip past inline whitespace.
10551 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10552 break;
10553 case PM_HEREDOC_INDENT_TILDE:
10554 // Skip past inline whitespace and calculate common
10555 // whitespace.
10556 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10557 if (**cursor == '\t') {
10558 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10559 } else {
10560 whitespace++;
10561 }
10562 (*cursor)++;
10563 }
10564
10565 break;
10566 }
10567
10568 return whitespace;
10569}
10570
10575static uint8_t
10576pm_lex_percent_delimiter(pm_parser_t *parser) {
10577 size_t eol_length = match_eol(parser);
10578
10579 if (eol_length) {
10580 if (parser->heredoc_end) {
10581 // If we have already lexed a heredoc, then the newline has already
10582 // been added to the list. In this case we want to just flush the
10583 // heredoc end.
10584 parser_flush_heredoc_end(parser);
10585 } else {
10586 // Otherwise, we'll add the newline to the list of newlines.
10587 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10588 }
10589
10590 uint8_t delimiter = *parser->current.end;
10591
10592 // If our delimiter is \r\n, we want to treat it as if it's \n.
10593 // For example, %\r\nfoo\r\n should be "foo"
10594 if (eol_length == 2) {
10595 delimiter = *(parser->current.end + 1);
10596 }
10597
10598 parser->current.end += eol_length;
10599 return delimiter;
10600 }
10601
10602 return *parser->current.end++;
10603}
10604
10609#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10610
10617static void
10618parser_lex(pm_parser_t *parser) {
10619 assert(parser->current.end <= parser->end);
10620 parser->previous = parser->current;
10621
10622 // This value mirrors cmd_state from CRuby.
10623 bool previous_command_start = parser->command_start;
10624 parser->command_start = false;
10625
10626 // This is used to communicate to the newline lexing function that we've
10627 // already seen a comment.
10628 bool lexed_comment = false;
10629
10630 // Here we cache the current value of the semantic token seen flag. This is
10631 // used to reset it in case we find a token that shouldn't flip this flag.
10632 unsigned int semantic_token_seen = parser->semantic_token_seen;
10633 parser->semantic_token_seen = true;
10634
10635 switch (parser->lex_modes.current->mode) {
10636 case PM_LEX_DEFAULT:
10637 case PM_LEX_EMBEXPR:
10638 case PM_LEX_EMBVAR:
10639
10640 // We have a specific named label here because we are going to jump back to
10641 // this location in the event that we have lexed a token that should not be
10642 // returned to the parser. This includes comments, ignored newlines, and
10643 // invalid tokens of some form.
10644 lex_next_token: {
10645 // If we have the special next_start pointer set, then we're going to jump
10646 // to that location and start lexing from there.
10647 if (parser->next_start != NULL) {
10648 parser->current.end = parser->next_start;
10649 parser->next_start = NULL;
10650 }
10651
10652 // This value mirrors space_seen from CRuby. It tracks whether or not
10653 // space has been eaten before the start of the next token.
10654 bool space_seen = false;
10655
10656 // First, we're going to skip past any whitespace at the front of the next
10657 // token.
10658 bool chomping = true;
10659 while (parser->current.end < parser->end && chomping) {
10660 switch (*parser->current.end) {
10661 case ' ':
10662 case '\t':
10663 case '\f':
10664 case '\v':
10665 parser->current.end++;
10666 space_seen = true;
10667 break;
10668 case '\r':
10669 if (match_eol_offset(parser, 1)) {
10670 chomping = false;
10671 } else {
10672 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10673 parser->current.end++;
10674 space_seen = true;
10675 }
10676 break;
10677 case '\\': {
10678 size_t eol_length = match_eol_offset(parser, 1);
10679 if (eol_length) {
10680 if (parser->heredoc_end) {
10681 parser->current.end = parser->heredoc_end;
10682 parser->heredoc_end = NULL;
10683 } else {
10684 parser->current.end += eol_length + 1;
10685 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10686 space_seen = true;
10687 }
10688 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10689 parser->current.end += 2;
10690 } else {
10691 chomping = false;
10692 }
10693
10694 break;
10695 }
10696 default:
10697 chomping = false;
10698 break;
10699 }
10700 }
10701
10702 // Next, we'll set to start of this token to be the current end.
10703 parser->current.start = parser->current.end;
10704
10705 // We'll check if we're at the end of the file. If we are, then we
10706 // need to return the EOF token.
10707 if (parser->current.end >= parser->end) {
10708 // If we hit EOF, but the EOF came immediately after a newline,
10709 // set the start of the token to the newline. This way any EOF
10710 // errors will be reported as happening on that line rather than
10711 // a line after. For example "foo(\n" should report an error
10712 // on line 1 even though EOF technically occurs on line 2.
10713 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10714 parser->current.start -= 1;
10715 }
10716 LEX(PM_TOKEN_EOF);
10717 }
10718
10719 // Finally, we'll check the current character to determine the next
10720 // token.
10721 switch (*parser->current.end++) {
10722 case '\0': // NUL or end of script
10723 case '\004': // ^D
10724 case '\032': // ^Z
10725 parser->current.end--;
10726 LEX(PM_TOKEN_EOF);
10727
10728 case '#': { // comments
10729 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10730 parser->current.end = ending == NULL ? parser->end : ending;
10731
10732 // If we found a comment while lexing, then we're going to
10733 // add it to the list of comments in the file and keep
10734 // lexing.
10735 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10736 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10737
10738 if (ending) parser->current.end++;
10739 parser->current.type = PM_TOKEN_COMMENT;
10740 parser_lex_callback(parser);
10741
10742 // Here, parse the comment to see if it's a magic comment
10743 // and potentially change state on the parser.
10744 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10745 ptrdiff_t length = parser->current.end - parser->current.start;
10746
10747 // If we didn't find a magic comment within the first
10748 // pass and we're at the start of the file, then we need
10749 // to do another pass to potentially find other patterns
10750 // for encoding comments.
10751 if (length >= 10 && !parser->encoding_locked) {
10752 parser_lex_magic_comment_encoding(parser);
10753 }
10754 }
10755
10756 lexed_comment = true;
10757 }
10759 case '\r':
10760 case '\n': {
10761 parser->semantic_token_seen = semantic_token_seen & 0x1;
10762 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10763
10764 if (eol_length) {
10765 // The only way you can have carriage returns in this
10766 // particular loop is if you have a carriage return
10767 // followed by a newline. In that case we'll just skip
10768 // over the carriage return and continue lexing, in
10769 // order to make it so that the newline token
10770 // encapsulates both the carriage return and the
10771 // newline. Note that we need to check that we haven't
10772 // already lexed a comment here because that falls
10773 // through into here as well.
10774 if (!lexed_comment) {
10775 parser->current.end += eol_length - 1; // skip CR
10776 }
10777
10778 if (parser->heredoc_end == NULL) {
10779 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10780 }
10781 }
10782
10783 if (parser->heredoc_end) {
10784 parser_flush_heredoc_end(parser);
10785 }
10786
10787 // If this is an ignored newline, then we can continue lexing after
10788 // calling the callback with the ignored newline token.
10789 switch (lex_state_ignored_p(parser)) {
10790 case PM_IGNORED_NEWLINE_NONE:
10791 break;
10792 case PM_IGNORED_NEWLINE_PATTERN:
10793 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10794 if (!lexed_comment) parser_lex_ignored_newline(parser);
10795 lex_state_set(parser, PM_LEX_STATE_BEG);
10796 parser->command_start = true;
10797 parser->current.type = PM_TOKEN_NEWLINE;
10798 return;
10799 }
10801 case PM_IGNORED_NEWLINE_ALL:
10802 if (!lexed_comment) parser_lex_ignored_newline(parser);
10803 lexed_comment = false;
10804 goto lex_next_token;
10805 }
10806
10807 // Here we need to look ahead and see if there is a call operator
10808 // (either . or &.) that starts the next line. If there is, then this
10809 // is going to become an ignored newline and we're going to instead
10810 // return the call operator.
10811 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10812 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10813
10814 if (next_content < parser->end) {
10815 // If we hit a comment after a newline, then we're going to check
10816 // if it's ignored or if it's followed by a method call ('.').
10817 // If it is, then we're going to call the
10818 // callback with an ignored newline and then continue lexing.
10819 // Otherwise we'll return a regular newline.
10820 if (next_content[0] == '#') {
10821 // Here we look for a "." or "&." following a "\n".
10822 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10823
10824 while (following && (following + 1 < parser->end)) {
10825 following++;
10826 following += pm_strspn_inline_whitespace(following, parser->end - following);
10827
10828 // If this is not followed by a comment, then we can break out
10829 // of this loop.
10830 if (peek_at(parser, following) != '#') break;
10831
10832 // If there is a comment, then we need to find the end of the
10833 // comment and continue searching from there.
10834 following = next_newline(following, parser->end - following);
10835 }
10836
10837 // If the lex state was ignored, or we hit a '.' or a '&.',
10838 // we will lex the ignored newline
10839 if (
10840 lex_state_ignored_p(parser) ||
10841 (following && (
10842 (peek_at(parser, following) == '.') ||
10843 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10844 ))
10845 ) {
10846 if (!lexed_comment) parser_lex_ignored_newline(parser);
10847 lexed_comment = false;
10848 goto lex_next_token;
10849 }
10850 }
10851
10852 // If we hit a . after a newline, then we're in a call chain and
10853 // we need to return the call operator.
10854 if (next_content[0] == '.') {
10855 // To match ripper, we need to emit an ignored newline even though
10856 // it's a real newline in the case that we have a beginless range
10857 // on a subsequent line.
10858 if (peek_at(parser, next_content + 1) == '.') {
10859 if (!lexed_comment) parser_lex_ignored_newline(parser);
10860 lex_state_set(parser, PM_LEX_STATE_BEG);
10861 parser->command_start = true;
10862 parser->current.type = PM_TOKEN_NEWLINE;
10863 return;
10864 }
10865
10866 if (!lexed_comment) parser_lex_ignored_newline(parser);
10867 lex_state_set(parser, PM_LEX_STATE_DOT);
10868 parser->current.start = next_content;
10869 parser->current.end = next_content + 1;
10870 parser->next_start = NULL;
10871 LEX(PM_TOKEN_DOT);
10872 }
10873
10874 // If we hit a &. after a newline, then we're in a call chain and
10875 // we need to return the call operator.
10876 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10877 if (!lexed_comment) parser_lex_ignored_newline(parser);
10878 lex_state_set(parser, PM_LEX_STATE_DOT);
10879 parser->current.start = next_content;
10880 parser->current.end = next_content + 2;
10881 parser->next_start = NULL;
10882 LEX(PM_TOKEN_AMPERSAND_DOT);
10883 }
10884 }
10885
10886 // At this point we know this is a regular newline, and we can set the
10887 // necessary state and return the token.
10888 lex_state_set(parser, PM_LEX_STATE_BEG);
10889 parser->command_start = true;
10890 parser->current.type = PM_TOKEN_NEWLINE;
10891 if (!lexed_comment) parser_lex_callback(parser);
10892 return;
10893 }
10894
10895 // ,
10896 case ',':
10897 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10899 }
10900
10901 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10902 LEX(PM_TOKEN_COMMA);
10903
10904 // (
10905 case '(': {
10906 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10907
10908 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10909 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10910 }
10911
10912 parser->enclosure_nesting++;
10913 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10914 pm_do_loop_stack_push(parser, false);
10915 LEX(type);
10916 }
10917
10918 // )
10919 case ')':
10920 parser->enclosure_nesting--;
10921 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10922 pm_do_loop_stack_pop(parser);
10923 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10924
10925 // ;
10926 case ';':
10927 lex_state_set(parser, PM_LEX_STATE_BEG);
10928 parser->command_start = true;
10929 LEX(PM_TOKEN_SEMICOLON);
10930
10931 // [ [] []=
10932 case '[':
10933 parser->enclosure_nesting++;
10934 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10935
10936 if (lex_state_operator_p(parser)) {
10937 if (match(parser, ']')) {
10938 parser->enclosure_nesting--;
10939 lex_state_set(parser, PM_LEX_STATE_ARG);
10940 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10941 }
10942
10943 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10944 LEX(type);
10945 }
10946
10947 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10948 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10949 }
10950
10951 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10952 pm_do_loop_stack_push(parser, false);
10953 LEX(type);
10954
10955 // ]
10956 case ']':
10957 parser->enclosure_nesting--;
10958 lex_state_set(parser, PM_LEX_STATE_END);
10959 pm_do_loop_stack_pop(parser);
10960 LEX(PM_TOKEN_BRACKET_RIGHT);
10961
10962 // {
10963 case '{': {
10964 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10965
10966 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10967 // This { begins a lambda
10968 parser->command_start = true;
10969 lex_state_set(parser, PM_LEX_STATE_BEG);
10970 type = PM_TOKEN_LAMBDA_BEGIN;
10971 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10972 // This { begins a hash literal
10973 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10974 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10975 // This { begins a block
10976 parser->command_start = true;
10977 lex_state_set(parser, PM_LEX_STATE_BEG);
10978 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10979 // This { begins a block on a command
10980 parser->command_start = true;
10981 lex_state_set(parser, PM_LEX_STATE_BEG);
10982 } else {
10983 // This { begins a hash literal
10984 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10985 }
10986
10987 parser->enclosure_nesting++;
10988 parser->brace_nesting++;
10989 pm_do_loop_stack_push(parser, false);
10990
10991 LEX(type);
10992 }
10993
10994 // }
10995 case '}':
10996 parser->enclosure_nesting--;
10997 pm_do_loop_stack_pop(parser);
10998
10999 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11000 lex_mode_pop(parser);
11001 LEX(PM_TOKEN_EMBEXPR_END);
11002 }
11003
11004 parser->brace_nesting--;
11005 lex_state_set(parser, PM_LEX_STATE_END);
11006 LEX(PM_TOKEN_BRACE_RIGHT);
11007
11008 // * ** **= *=
11009 case '*': {
11010 if (match(parser, '*')) {
11011 if (match(parser, '=')) {
11012 lex_state_set(parser, PM_LEX_STATE_BEG);
11013 LEX(PM_TOKEN_STAR_STAR_EQUAL);
11014 }
11015
11016 pm_token_type_t type = PM_TOKEN_STAR_STAR;
11017
11018 if (lex_state_spcarg_p(parser, space_seen)) {
11019 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11020 type = PM_TOKEN_USTAR_STAR;
11021 } else if (lex_state_beg_p(parser)) {
11022 type = PM_TOKEN_USTAR_STAR;
11023 } else if (ambiguous_operator_p(parser, space_seen)) {
11024 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11025 }
11026
11027 if (lex_state_operator_p(parser)) {
11028 lex_state_set(parser, PM_LEX_STATE_ARG);
11029 } else {
11030 lex_state_set(parser, PM_LEX_STATE_BEG);
11031 }
11032
11033 LEX(type);
11034 }
11035
11036 if (match(parser, '=')) {
11037 lex_state_set(parser, PM_LEX_STATE_BEG);
11038 LEX(PM_TOKEN_STAR_EQUAL);
11039 }
11040
11041 pm_token_type_t type = PM_TOKEN_STAR;
11042
11043 if (lex_state_spcarg_p(parser, space_seen)) {
11044 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11045 type = PM_TOKEN_USTAR;
11046 } else if (lex_state_beg_p(parser)) {
11047 type = PM_TOKEN_USTAR;
11048 } else if (ambiguous_operator_p(parser, space_seen)) {
11049 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11050 }
11051
11052 if (lex_state_operator_p(parser)) {
11053 lex_state_set(parser, PM_LEX_STATE_ARG);
11054 } else {
11055 lex_state_set(parser, PM_LEX_STATE_BEG);
11056 }
11057
11058 LEX(type);
11059 }
11060
11061 // ! != !~ !@
11062 case '!':
11063 if (lex_state_operator_p(parser)) {
11064 lex_state_set(parser, PM_LEX_STATE_ARG);
11065 if (match(parser, '@')) {
11066 LEX(PM_TOKEN_BANG);
11067 }
11068 } else {
11069 lex_state_set(parser, PM_LEX_STATE_BEG);
11070 }
11071
11072 if (match(parser, '=')) {
11073 LEX(PM_TOKEN_BANG_EQUAL);
11074 }
11075
11076 if (match(parser, '~')) {
11077 LEX(PM_TOKEN_BANG_TILDE);
11078 }
11079
11080 LEX(PM_TOKEN_BANG);
11081
11082 // = => =~ == === =begin
11083 case '=':
11084 if (
11085 current_token_starts_line(parser) &&
11086 (parser->current.end + 5 <= parser->end) &&
11087 memcmp(parser->current.end, "begin", 5) == 0 &&
11088 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11089 ) {
11090 pm_token_type_t type = lex_embdoc(parser);
11091 if (type == PM_TOKEN_EOF) {
11092 LEX(type);
11093 }
11094
11095 goto lex_next_token;
11096 }
11097
11098 if (lex_state_operator_p(parser)) {
11099 lex_state_set(parser, PM_LEX_STATE_ARG);
11100 } else {
11101 lex_state_set(parser, PM_LEX_STATE_BEG);
11102 }
11103
11104 if (match(parser, '>')) {
11105 LEX(PM_TOKEN_EQUAL_GREATER);
11106 }
11107
11108 if (match(parser, '~')) {
11109 LEX(PM_TOKEN_EQUAL_TILDE);
11110 }
11111
11112 if (match(parser, '=')) {
11113 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11114 }
11115
11116 LEX(PM_TOKEN_EQUAL);
11117
11118 // < << <<= <= <=>
11119 case '<':
11120 if (match(parser, '<')) {
11121 if (
11122 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11123 !lex_state_end_p(parser) &&
11124 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11125 ) {
11126 const uint8_t *end = parser->current.end;
11127
11128 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11129 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11130
11131 if (match(parser, '-')) {
11132 indent = PM_HEREDOC_INDENT_DASH;
11133 }
11134 else if (match(parser, '~')) {
11135 indent = PM_HEREDOC_INDENT_TILDE;
11136 }
11137
11138 if (match(parser, '`')) {
11139 quote = PM_HEREDOC_QUOTE_BACKTICK;
11140 }
11141 else if (match(parser, '"')) {
11142 quote = PM_HEREDOC_QUOTE_DOUBLE;
11143 }
11144 else if (match(parser, '\'')) {
11145 quote = PM_HEREDOC_QUOTE_SINGLE;
11146 }
11147
11148 const uint8_t *ident_start = parser->current.end;
11149 size_t width = 0;
11150
11151 if (parser->current.end >= parser->end) {
11152 parser->current.end = end;
11153 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11154 parser->current.end = end;
11155 } else {
11156 if (quote == PM_HEREDOC_QUOTE_NONE) {
11157 parser->current.end += width;
11158
11159 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11160 parser->current.end += width;
11161 }
11162 } else {
11163 // If we have quotes, then we're going to go until we find the
11164 // end quote.
11165 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11166 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11167 parser->current.end++;
11168 }
11169 }
11170
11171 size_t ident_length = (size_t) (parser->current.end - ident_start);
11172 bool ident_error = false;
11173
11174 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11175 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11176 ident_error = true;
11177 }
11178
11179 parser->explicit_encoding = NULL;
11180 lex_mode_push(parser, (pm_lex_mode_t) {
11181 .mode = PM_LEX_HEREDOC,
11182 .as.heredoc = {
11183 .base = {
11184 .ident_start = ident_start,
11185 .ident_length = ident_length,
11186 .quote = quote,
11187 .indent = indent
11188 },
11189 .next_start = parser->current.end,
11190 .common_whitespace = NULL,
11191 .line_continuation = false
11192 }
11193 });
11194
11195 if (parser->heredoc_end == NULL) {
11196 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11197
11198 if (body_start == NULL) {
11199 // If there is no newline after the heredoc identifier, then
11200 // this is not a valid heredoc declaration. In this case we
11201 // will add an error, but we will still return a heredoc
11202 // start.
11203 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11204 body_start = parser->end;
11205 } else {
11206 // Otherwise, we want to indicate that the body of the
11207 // heredoc starts on the character after the next newline.
11208 pm_newline_list_append(&parser->newline_list, body_start);
11209 body_start++;
11210 }
11211
11212 parser->next_start = body_start;
11213 } else {
11214 parser->next_start = parser->heredoc_end;
11215 }
11216
11217 LEX(PM_TOKEN_HEREDOC_START);
11218 }
11219 }
11220
11221 if (match(parser, '=')) {
11222 lex_state_set(parser, PM_LEX_STATE_BEG);
11223 LEX(PM_TOKEN_LESS_LESS_EQUAL);
11224 }
11225
11226 if (ambiguous_operator_p(parser, space_seen)) {
11227 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11228 }
11229
11230 if (lex_state_operator_p(parser)) {
11231 lex_state_set(parser, PM_LEX_STATE_ARG);
11232 } else {
11233 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11234 lex_state_set(parser, PM_LEX_STATE_BEG);
11235 }
11236
11237 LEX(PM_TOKEN_LESS_LESS);
11238 }
11239
11240 if (lex_state_operator_p(parser)) {
11241 lex_state_set(parser, PM_LEX_STATE_ARG);
11242 } else {
11243 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11244 lex_state_set(parser, PM_LEX_STATE_BEG);
11245 }
11246
11247 if (match(parser, '=')) {
11248 if (match(parser, '>')) {
11249 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
11250 }
11251
11252 LEX(PM_TOKEN_LESS_EQUAL);
11253 }
11254
11255 LEX(PM_TOKEN_LESS);
11256
11257 // > >> >>= >=
11258 case '>':
11259 if (match(parser, '>')) {
11260 if (lex_state_operator_p(parser)) {
11261 lex_state_set(parser, PM_LEX_STATE_ARG);
11262 } else {
11263 lex_state_set(parser, PM_LEX_STATE_BEG);
11264 }
11265 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11266 }
11267
11268 if (lex_state_operator_p(parser)) {
11269 lex_state_set(parser, PM_LEX_STATE_ARG);
11270 } else {
11271 lex_state_set(parser, PM_LEX_STATE_BEG);
11272 }
11273
11274 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11275
11276 // double-quoted string literal
11277 case '"': {
11278 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11279 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11280 LEX(PM_TOKEN_STRING_BEGIN);
11281 }
11282
11283 // xstring literal
11284 case '`': {
11285 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11286 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11287 LEX(PM_TOKEN_BACKTICK);
11288 }
11289
11290 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11291 if (previous_command_start) {
11292 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11293 } else {
11294 lex_state_set(parser, PM_LEX_STATE_ARG);
11295 }
11296
11297 LEX(PM_TOKEN_BACKTICK);
11298 }
11299
11300 lex_mode_push_string(parser, true, false, '\0', '`');
11301 LEX(PM_TOKEN_BACKTICK);
11302 }
11303
11304 // single-quoted string literal
11305 case '\'': {
11306 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11307 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11308 LEX(PM_TOKEN_STRING_BEGIN);
11309 }
11310
11311 // ? character literal
11312 case '?':
11313 LEX(lex_question_mark(parser));
11314
11315 // & && &&= &=
11316 case '&': {
11317 if (match(parser, '&')) {
11318 lex_state_set(parser, PM_LEX_STATE_BEG);
11319
11320 if (match(parser, '=')) {
11321 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
11322 }
11323
11324 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
11325 }
11326
11327 if (match(parser, '=')) {
11328 lex_state_set(parser, PM_LEX_STATE_BEG);
11329 LEX(PM_TOKEN_AMPERSAND_EQUAL);
11330 }
11331
11332 if (match(parser, '.')) {
11333 lex_state_set(parser, PM_LEX_STATE_DOT);
11334 LEX(PM_TOKEN_AMPERSAND_DOT);
11335 }
11336
11337 pm_token_type_t type = PM_TOKEN_AMPERSAND;
11338 if (lex_state_spcarg_p(parser, space_seen)) {
11339 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11340 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11341 } else {
11342 const uint8_t delim = peek_offset(parser, 1);
11343
11344 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11345 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11346 }
11347 }
11348
11349 type = PM_TOKEN_UAMPERSAND;
11350 } else if (lex_state_beg_p(parser)) {
11351 type = PM_TOKEN_UAMPERSAND;
11352 } else if (ambiguous_operator_p(parser, space_seen)) {
11353 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11354 }
11355
11356 if (lex_state_operator_p(parser)) {
11357 lex_state_set(parser, PM_LEX_STATE_ARG);
11358 } else {
11359 lex_state_set(parser, PM_LEX_STATE_BEG);
11360 }
11361
11362 LEX(type);
11363 }
11364
11365 // | || ||= |=
11366 case '|':
11367 if (match(parser, '|')) {
11368 if (match(parser, '=')) {
11369 lex_state_set(parser, PM_LEX_STATE_BEG);
11370 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
11371 }
11372
11373 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11374 parser->current.end--;
11375 LEX(PM_TOKEN_PIPE);
11376 }
11377
11378 lex_state_set(parser, PM_LEX_STATE_BEG);
11379 LEX(PM_TOKEN_PIPE_PIPE);
11380 }
11381
11382 if (match(parser, '=')) {
11383 lex_state_set(parser, PM_LEX_STATE_BEG);
11384 LEX(PM_TOKEN_PIPE_EQUAL);
11385 }
11386
11387 if (lex_state_operator_p(parser)) {
11388 lex_state_set(parser, PM_LEX_STATE_ARG);
11389 } else {
11390 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11391 }
11392
11393 LEX(PM_TOKEN_PIPE);
11394
11395 // + += +@
11396 case '+': {
11397 if (lex_state_operator_p(parser)) {
11398 lex_state_set(parser, PM_LEX_STATE_ARG);
11399
11400 if (match(parser, '@')) {
11401 LEX(PM_TOKEN_UPLUS);
11402 }
11403
11404 LEX(PM_TOKEN_PLUS);
11405 }
11406
11407 if (match(parser, '=')) {
11408 lex_state_set(parser, PM_LEX_STATE_BEG);
11409 LEX(PM_TOKEN_PLUS_EQUAL);
11410 }
11411
11412 if (
11413 lex_state_beg_p(parser) ||
11414 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11415 ) {
11416 lex_state_set(parser, PM_LEX_STATE_BEG);
11417
11418 if (pm_char_is_decimal_digit(peek(parser))) {
11419 parser->current.end++;
11420 pm_token_type_t type = lex_numeric(parser);
11421 lex_state_set(parser, PM_LEX_STATE_END);
11422 LEX(type);
11423 }
11424
11425 LEX(PM_TOKEN_UPLUS);
11426 }
11427
11428 if (ambiguous_operator_p(parser, space_seen)) {
11429 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11430 }
11431
11432 lex_state_set(parser, PM_LEX_STATE_BEG);
11433 LEX(PM_TOKEN_PLUS);
11434 }
11435
11436 // - -= -@
11437 case '-': {
11438 if (lex_state_operator_p(parser)) {
11439 lex_state_set(parser, PM_LEX_STATE_ARG);
11440
11441 if (match(parser, '@')) {
11442 LEX(PM_TOKEN_UMINUS);
11443 }
11444
11445 LEX(PM_TOKEN_MINUS);
11446 }
11447
11448 if (match(parser, '=')) {
11449 lex_state_set(parser, PM_LEX_STATE_BEG);
11450 LEX(PM_TOKEN_MINUS_EQUAL);
11451 }
11452
11453 if (match(parser, '>')) {
11454 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11455 LEX(PM_TOKEN_MINUS_GREATER);
11456 }
11457
11458 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11459 bool is_beg = lex_state_beg_p(parser);
11460 if (!is_beg && spcarg) {
11461 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11462 }
11463
11464 if (is_beg || spcarg) {
11465 lex_state_set(parser, PM_LEX_STATE_BEG);
11466 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11467 }
11468
11469 if (ambiguous_operator_p(parser, space_seen)) {
11470 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11471 }
11472
11473 lex_state_set(parser, PM_LEX_STATE_BEG);
11474 LEX(PM_TOKEN_MINUS);
11475 }
11476
11477 // . .. ...
11478 case '.': {
11479 bool beg_p = lex_state_beg_p(parser);
11480
11481 if (match(parser, '.')) {
11482 if (match(parser, '.')) {
11483 // If we're _not_ inside a range within default parameters
11484 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11485 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11486 lex_state_set(parser, PM_LEX_STATE_BEG);
11487 } else {
11488 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11489 }
11490 LEX(PM_TOKEN_UDOT_DOT_DOT);
11491 }
11492
11493 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11494 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11495 }
11496
11497 lex_state_set(parser, PM_LEX_STATE_BEG);
11498 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
11499 }
11500
11501 lex_state_set(parser, PM_LEX_STATE_BEG);
11502 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11503 }
11504
11505 lex_state_set(parser, PM_LEX_STATE_DOT);
11506 LEX(PM_TOKEN_DOT);
11507 }
11508
11509 // integer
11510 case '0':
11511 case '1':
11512 case '2':
11513 case '3':
11514 case '4':
11515 case '5':
11516 case '6':
11517 case '7':
11518 case '8':
11519 case '9': {
11520 pm_token_type_t type = lex_numeric(parser);
11521 lex_state_set(parser, PM_LEX_STATE_END);
11522 LEX(type);
11523 }
11524
11525 // :: symbol
11526 case ':':
11527 if (match(parser, ':')) {
11528 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11529 lex_state_set(parser, PM_LEX_STATE_BEG);
11530 LEX(PM_TOKEN_UCOLON_COLON);
11531 }
11532
11533 lex_state_set(parser, PM_LEX_STATE_DOT);
11534 LEX(PM_TOKEN_COLON_COLON);
11535 }
11536
11537 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11538 lex_state_set(parser, PM_LEX_STATE_BEG);
11539 LEX(PM_TOKEN_COLON);
11540 }
11541
11542 if (peek(parser) == '"' || peek(parser) == '\'') {
11543 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11544 parser->current.end++;
11545 }
11546
11547 lex_state_set(parser, PM_LEX_STATE_FNAME);
11548 LEX(PM_TOKEN_SYMBOL_BEGIN);
11549
11550 // / /=
11551 case '/':
11552 if (lex_state_beg_p(parser)) {
11553 lex_mode_push_regexp(parser, '\0', '/');
11554 LEX(PM_TOKEN_REGEXP_BEGIN);
11555 }
11556
11557 if (match(parser, '=')) {
11558 lex_state_set(parser, PM_LEX_STATE_BEG);
11559 LEX(PM_TOKEN_SLASH_EQUAL);
11560 }
11561
11562 if (lex_state_spcarg_p(parser, space_seen)) {
11563 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11564 lex_mode_push_regexp(parser, '\0', '/');
11565 LEX(PM_TOKEN_REGEXP_BEGIN);
11566 }
11567
11568 if (ambiguous_operator_p(parser, space_seen)) {
11569 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11570 }
11571
11572 if (lex_state_operator_p(parser)) {
11573 lex_state_set(parser, PM_LEX_STATE_ARG);
11574 } else {
11575 lex_state_set(parser, PM_LEX_STATE_BEG);
11576 }
11577
11578 LEX(PM_TOKEN_SLASH);
11579
11580 // ^ ^=
11581 case '^':
11582 if (lex_state_operator_p(parser)) {
11583 lex_state_set(parser, PM_LEX_STATE_ARG);
11584 } else {
11585 lex_state_set(parser, PM_LEX_STATE_BEG);
11586 }
11587 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11588
11589 // ~ ~@
11590 case '~':
11591 if (lex_state_operator_p(parser)) {
11592 (void) match(parser, '@');
11593 lex_state_set(parser, PM_LEX_STATE_ARG);
11594 } else {
11595 lex_state_set(parser, PM_LEX_STATE_BEG);
11596 }
11597
11598 LEX(PM_TOKEN_TILDE);
11599
11600 // % %= %i %I %q %Q %w %W
11601 case '%': {
11602 // If there is no subsequent character then we have an
11603 // invalid token. We're going to say it's the percent
11604 // operator because we don't want to move into the string
11605 // lex mode unnecessarily.
11606 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11607 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11608 LEX(PM_TOKEN_PERCENT);
11609 }
11610
11611 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11612 lex_state_set(parser, PM_LEX_STATE_BEG);
11613 LEX(PM_TOKEN_PERCENT_EQUAL);
11614 } else if (
11615 lex_state_beg_p(parser) ||
11616 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11617 lex_state_spcarg_p(parser, space_seen)
11618 ) {
11619 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11620 if (*parser->current.end >= 0x80) {
11621 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11622 }
11623
11624 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11625 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11626 LEX(PM_TOKEN_STRING_BEGIN);
11627 }
11628
11629 // Delimiters for %-literals cannot be alphanumeric. We
11630 // validate that here.
11631 uint8_t delimiter = peek_offset(parser, 1);
11632 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11633 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11634 goto lex_next_token;
11635 }
11636
11637 switch (peek(parser)) {
11638 case 'i': {
11639 parser->current.end++;
11640
11641 if (parser->current.end < parser->end) {
11642 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11643 } else {
11644 lex_mode_push_list_eof(parser);
11645 }
11646
11647 LEX(PM_TOKEN_PERCENT_LOWER_I);
11648 }
11649 case 'I': {
11650 parser->current.end++;
11651
11652 if (parser->current.end < parser->end) {
11653 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11654 } else {
11655 lex_mode_push_list_eof(parser);
11656 }
11657
11658 LEX(PM_TOKEN_PERCENT_UPPER_I);
11659 }
11660 case 'r': {
11661 parser->current.end++;
11662
11663 if (parser->current.end < parser->end) {
11664 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11665 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11666 } else {
11667 lex_mode_push_regexp(parser, '\0', '\0');
11668 }
11669
11670 LEX(PM_TOKEN_REGEXP_BEGIN);
11671 }
11672 case 'q': {
11673 parser->current.end++;
11674
11675 if (parser->current.end < parser->end) {
11676 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11677 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11678 } else {
11679 lex_mode_push_string_eof(parser);
11680 }
11681
11682 LEX(PM_TOKEN_STRING_BEGIN);
11683 }
11684 case 'Q': {
11685 parser->current.end++;
11686
11687 if (parser->current.end < parser->end) {
11688 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11689 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11690 } else {
11691 lex_mode_push_string_eof(parser);
11692 }
11693
11694 LEX(PM_TOKEN_STRING_BEGIN);
11695 }
11696 case 's': {
11697 parser->current.end++;
11698
11699 if (parser->current.end < parser->end) {
11700 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11701 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11702 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11703 } else {
11704 lex_mode_push_string_eof(parser);
11705 }
11706
11707 LEX(PM_TOKEN_SYMBOL_BEGIN);
11708 }
11709 case 'w': {
11710 parser->current.end++;
11711
11712 if (parser->current.end < parser->end) {
11713 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11714 } else {
11715 lex_mode_push_list_eof(parser);
11716 }
11717
11718 LEX(PM_TOKEN_PERCENT_LOWER_W);
11719 }
11720 case 'W': {
11721 parser->current.end++;
11722
11723 if (parser->current.end < parser->end) {
11724 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11725 } else {
11726 lex_mode_push_list_eof(parser);
11727 }
11728
11729 LEX(PM_TOKEN_PERCENT_UPPER_W);
11730 }
11731 case 'x': {
11732 parser->current.end++;
11733
11734 if (parser->current.end < parser->end) {
11735 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11736 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11737 } else {
11738 lex_mode_push_string_eof(parser);
11739 }
11740
11741 LEX(PM_TOKEN_PERCENT_LOWER_X);
11742 }
11743 default:
11744 // If we get to this point, then we have a % that is completely
11745 // unparsable. In this case we'll just drop it from the parser
11746 // and skip past it and hope that the next token is something
11747 // that we can parse.
11748 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11749 goto lex_next_token;
11750 }
11751 }
11752
11753 if (ambiguous_operator_p(parser, space_seen)) {
11754 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11755 }
11756
11757 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11758 LEX(PM_TOKEN_PERCENT);
11759 }
11760
11761 // global variable
11762 case '$': {
11763 pm_token_type_t type = lex_global_variable(parser);
11764
11765 // If we're lexing an embedded variable, then we need to pop back into
11766 // the parent lex context.
11767 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11768 lex_mode_pop(parser);
11769 }
11770
11771 lex_state_set(parser, PM_LEX_STATE_END);
11772 LEX(type);
11773 }
11774
11775 // instance variable, class variable
11776 case '@':
11777 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11778 LEX(lex_at_variable(parser));
11779
11780 default: {
11781 if (*parser->current.start != '_') {
11782 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11783
11784 // If this isn't the beginning of an identifier, then
11785 // it's an invalid token as we've exhausted all of the
11786 // other options. We'll skip past it and return the next
11787 // token after adding an appropriate error message.
11788 if (!width) {
11789 if (*parser->current.start >= 0x80) {
11790 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11791 } else if (*parser->current.start == '\\') {
11792 switch (peek_at(parser, parser->current.start + 1)) {
11793 case ' ':
11794 parser->current.end++;
11795 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11796 break;
11797 case '\f':
11798 parser->current.end++;
11799 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11800 break;
11801 case '\t':
11802 parser->current.end++;
11803 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11804 break;
11805 case '\v':
11806 parser->current.end++;
11807 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11808 break;
11809 case '\r':
11810 if (peek_at(parser, parser->current.start + 2) != '\n') {
11811 parser->current.end++;
11812 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11813 break;
11814 }
11816 default:
11817 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11818 break;
11819 }
11820 } else if (char_is_ascii_printable(*parser->current.start)) {
11821 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11822 } else {
11823 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11824 }
11825
11826 goto lex_next_token;
11827 }
11828
11829 parser->current.end = parser->current.start + width;
11830 }
11831
11832 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11833
11834 // If we've hit a __END__ and it was at the start of the
11835 // line or the start of the file and it is followed by
11836 // either a \n or a \r\n, then this is the last token of the
11837 // file.
11838 if (
11839 ((parser->current.end - parser->current.start) == 7) &&
11840 current_token_starts_line(parser) &&
11841 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11842 (parser->current.end == parser->end || match_eol(parser))
11843 ) {
11844 // Since we know we're about to add an __END__ comment,
11845 // we know we need to add all of the newlines to get the
11846 // correct column information for it.
11847 const uint8_t *cursor = parser->current.end;
11848 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11849 pm_newline_list_append(&parser->newline_list, cursor++);
11850 }
11851
11852 parser->current.end = parser->end;
11853 parser->current.type = PM_TOKEN___END__;
11854 parser_lex_callback(parser);
11855
11856 parser->data_loc.start = parser->current.start;
11857 parser->data_loc.end = parser->current.end;
11858
11859 LEX(PM_TOKEN_EOF);
11860 }
11861
11862 pm_lex_state_t last_state = parser->lex_state;
11863
11864 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11865 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11866 if (previous_command_start) {
11867 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11868 } else {
11869 lex_state_set(parser, PM_LEX_STATE_ARG);
11870 }
11871 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11872 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11873 } else {
11874 lex_state_set(parser, PM_LEX_STATE_END);
11875 }
11876 }
11877
11878 if (
11879 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11880 (type == PM_TOKEN_IDENTIFIER) &&
11881 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11882 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11883 ) {
11884 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11885 }
11886
11887 LEX(type);
11888 }
11889 }
11890 }
11891 case PM_LEX_LIST: {
11892 if (parser->next_start != NULL) {
11893 parser->current.end = parser->next_start;
11894 parser->next_start = NULL;
11895 }
11896
11897 // First we'll set the beginning of the token.
11898 parser->current.start = parser->current.end;
11899
11900 // If there's any whitespace at the start of the list, then we're
11901 // going to trim it off the beginning and create a new token.
11902 size_t whitespace;
11903
11904 if (parser->heredoc_end) {
11905 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11906 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11907 whitespace += 1;
11908 }
11909 } else {
11910 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11911 }
11912
11913 if (whitespace > 0) {
11914 parser->current.end += whitespace;
11915 if (peek_offset(parser, -1) == '\n') {
11916 // mutates next_start
11917 parser_flush_heredoc_end(parser);
11918 }
11919 LEX(PM_TOKEN_WORDS_SEP);
11920 }
11921
11922 // We'll check if we're at the end of the file. If we are, then we
11923 // need to return the EOF token.
11924 if (parser->current.end >= parser->end) {
11925 LEX(PM_TOKEN_EOF);
11926 }
11927
11928 // Here we'll get a list of the places where strpbrk should break,
11929 // and then find the first one.
11930 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11931 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11932 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11933
11934 // If we haven't found an escape yet, then this buffer will be
11935 // unallocated since we can refer directly to the source string.
11936 pm_token_buffer_t token_buffer = { 0 };
11937
11938 while (breakpoint != NULL) {
11939 // If we hit whitespace, then we must have received content by
11940 // now, so we can return an element of the list.
11941 if (pm_char_is_whitespace(*breakpoint)) {
11942 parser->current.end = breakpoint;
11943 pm_token_buffer_flush(parser, &token_buffer);
11944 LEX(PM_TOKEN_STRING_CONTENT);
11945 }
11946
11947 // If we hit the terminator, we need to check which token to
11948 // return.
11949 if (*breakpoint == lex_mode->as.list.terminator) {
11950 // If this terminator doesn't actually close the list, then
11951 // we need to continue on past it.
11952 if (lex_mode->as.list.nesting > 0) {
11953 parser->current.end = breakpoint + 1;
11954 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11955 lex_mode->as.list.nesting--;
11956 continue;
11957 }
11958
11959 // If we've hit the terminator and we've already skipped
11960 // past content, then we can return a list node.
11961 if (breakpoint > parser->current.start) {
11962 parser->current.end = breakpoint;
11963 pm_token_buffer_flush(parser, &token_buffer);
11964 LEX(PM_TOKEN_STRING_CONTENT);
11965 }
11966
11967 // Otherwise, switch back to the default state and return
11968 // the end of the list.
11969 parser->current.end = breakpoint + 1;
11970 lex_mode_pop(parser);
11971 lex_state_set(parser, PM_LEX_STATE_END);
11972 LEX(PM_TOKEN_STRING_END);
11973 }
11974
11975 // If we hit a null byte, skip directly past it.
11976 if (*breakpoint == '\0') {
11977 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11978 continue;
11979 }
11980
11981 // If we hit escapes, then we need to treat the next token
11982 // literally. In this case we'll skip past the next character
11983 // and find the next breakpoint.
11984 if (*breakpoint == '\\') {
11985 parser->current.end = breakpoint + 1;
11986
11987 // If we've hit the end of the file, then break out of the
11988 // loop by setting the breakpoint to NULL.
11989 if (parser->current.end == parser->end) {
11990 breakpoint = NULL;
11991 continue;
11992 }
11993
11994 pm_token_buffer_escape(parser, &token_buffer);
11995 uint8_t peeked = peek(parser);
11996
11997 switch (peeked) {
11998 case ' ':
11999 case '\f':
12000 case '\t':
12001 case '\v':
12002 case '\\':
12003 pm_token_buffer_push_byte(&token_buffer, peeked);
12004 parser->current.end++;
12005 break;
12006 case '\r':
12007 parser->current.end++;
12008 if (peek(parser) != '\n') {
12009 pm_token_buffer_push_byte(&token_buffer, '\r');
12010 break;
12011 }
12013 case '\n':
12014 pm_token_buffer_push_byte(&token_buffer, '\n');
12015
12016 if (parser->heredoc_end) {
12017 // ... if we are on the same line as a heredoc,
12018 // flush the heredoc and continue parsing after
12019 // heredoc_end.
12020 parser_flush_heredoc_end(parser);
12021 pm_token_buffer_copy(parser, &token_buffer);
12022 LEX(PM_TOKEN_STRING_CONTENT);
12023 } else {
12024 // ... else track the newline.
12025 pm_newline_list_append(&parser->newline_list, parser->current.end);
12026 }
12027
12028 parser->current.end++;
12029 break;
12030 default:
12031 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12032 pm_token_buffer_push_byte(&token_buffer, peeked);
12033 parser->current.end++;
12034 } else if (lex_mode->as.list.interpolation) {
12035 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12036 } else {
12037 pm_token_buffer_push_byte(&token_buffer, '\\');
12038 pm_token_buffer_push_escaped(&token_buffer, parser);
12039 }
12040
12041 break;
12042 }
12043
12044 token_buffer.cursor = parser->current.end;
12045 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12046 continue;
12047 }
12048
12049 // If we hit a #, then we will attempt to lex interpolation.
12050 if (*breakpoint == '#') {
12051 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12052
12053 if (type == PM_TOKEN_NOT_PROVIDED) {
12054 // If we haven't returned at this point then we had something
12055 // that looked like an interpolated class or instance variable
12056 // like "#@" but wasn't actually. In this case we'll just skip
12057 // to the next breakpoint.
12058 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12059 continue;
12060 }
12061
12062 if (type == PM_TOKEN_STRING_CONTENT) {
12063 pm_token_buffer_flush(parser, &token_buffer);
12064 }
12065
12066 LEX(type);
12067 }
12068
12069 // If we've hit the incrementor, then we need to skip past it
12070 // and find the next breakpoint.
12071 assert(*breakpoint == lex_mode->as.list.incrementor);
12072 parser->current.end = breakpoint + 1;
12073 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12074 lex_mode->as.list.nesting++;
12075 continue;
12076 }
12077
12078 if (parser->current.end > parser->current.start) {
12079 pm_token_buffer_flush(parser, &token_buffer);
12080 LEX(PM_TOKEN_STRING_CONTENT);
12081 }
12082
12083 // If we were unable to find a breakpoint, then this token hits the
12084 // end of the file.
12085 parser->current.end = parser->end;
12086 pm_token_buffer_flush(parser, &token_buffer);
12087 LEX(PM_TOKEN_STRING_CONTENT);
12088 }
12089 case PM_LEX_REGEXP: {
12090 // First, we'll set to start of this token to be the current end.
12091 if (parser->next_start == NULL) {
12092 parser->current.start = parser->current.end;
12093 } else {
12094 parser->current.start = parser->next_start;
12095 parser->current.end = parser->next_start;
12096 parser->next_start = NULL;
12097 }
12098
12099 // We'll check if we're at the end of the file. If we are, then we
12100 // need to return the EOF token.
12101 if (parser->current.end >= parser->end) {
12102 LEX(PM_TOKEN_EOF);
12103 }
12104
12105 // Get a reference to the current mode.
12106 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12107
12108 // These are the places where we need to split up the content of the
12109 // regular expression. We'll use strpbrk to find the first of these
12110 // characters.
12111 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12112 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12113 pm_regexp_token_buffer_t token_buffer = { 0 };
12114
12115 while (breakpoint != NULL) {
12116 uint8_t term = lex_mode->as.regexp.terminator;
12117 bool is_terminator = (*breakpoint == term);
12118
12119 // If the terminator is newline, we need to consider \r\n _also_ a newline
12120 // For example: `%\nfoo\r\n`
12121 // The string should be "foo", not "foo\r"
12122 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12123 if (term == '\n') {
12124 is_terminator = true;
12125 }
12126
12127 // If the terminator is a CR, but we see a CRLF, we need to
12128 // treat the CRLF as a newline, meaning this is _not_ the
12129 // terminator
12130 if (term == '\r') {
12131 is_terminator = false;
12132 }
12133 }
12134
12135 // If we hit the terminator, we need to determine what kind of
12136 // token to return.
12137 if (is_terminator) {
12138 if (lex_mode->as.regexp.nesting > 0) {
12139 parser->current.end = breakpoint + 1;
12140 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12141 lex_mode->as.regexp.nesting--;
12142 continue;
12143 }
12144
12145 // Here we've hit the terminator. If we have already consumed
12146 // content then we need to return that content as string content
12147 // first.
12148 if (breakpoint > parser->current.start) {
12149 parser->current.end = breakpoint;
12150 pm_regexp_token_buffer_flush(parser, &token_buffer);
12151 LEX(PM_TOKEN_STRING_CONTENT);
12152 }
12153
12154 // Check here if we need to track the newline.
12155 size_t eol_length = match_eol_at(parser, breakpoint);
12156 if (eol_length) {
12157 parser->current.end = breakpoint + eol_length;
12158 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12159 } else {
12160 parser->current.end = breakpoint + 1;
12161 }
12162
12163 // Since we've hit the terminator of the regular expression,
12164 // we now need to parse the options.
12165 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12166
12167 lex_mode_pop(parser);
12168 lex_state_set(parser, PM_LEX_STATE_END);
12169 LEX(PM_TOKEN_REGEXP_END);
12170 }
12171
12172 // If we've hit the incrementor, then we need to skip past it
12173 // and find the next breakpoint.
12174 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12175 parser->current.end = breakpoint + 1;
12176 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12177 lex_mode->as.regexp.nesting++;
12178 continue;
12179 }
12180
12181 switch (*breakpoint) {
12182 case '\0':
12183 // If we hit a null byte, skip directly past it.
12184 parser->current.end = breakpoint + 1;
12185 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12186 break;
12187 case '\r':
12188 if (peek_at(parser, breakpoint + 1) != '\n') {
12189 parser->current.end = breakpoint + 1;
12190 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12191 break;
12192 }
12193
12194 breakpoint++;
12195 parser->current.end = breakpoint;
12196 pm_regexp_token_buffer_escape(parser, &token_buffer);
12197 token_buffer.base.cursor = breakpoint;
12198
12200 case '\n':
12201 // If we've hit a newline, then we need to track that in
12202 // the list of newlines.
12203 if (parser->heredoc_end == NULL) {
12204 pm_newline_list_append(&parser->newline_list, breakpoint);
12205 parser->current.end = breakpoint + 1;
12206 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12207 break;
12208 }
12209
12210 parser->current.end = breakpoint + 1;
12211 parser_flush_heredoc_end(parser);
12212 pm_regexp_token_buffer_flush(parser, &token_buffer);
12213 LEX(PM_TOKEN_STRING_CONTENT);
12214 case '\\': {
12215 // If we hit escapes, then we need to treat the next
12216 // token literally. In this case we'll skip past the
12217 // next character and find the next breakpoint.
12218 parser->current.end = breakpoint + 1;
12219
12220 // If we've hit the end of the file, then break out of
12221 // the loop by setting the breakpoint to NULL.
12222 if (parser->current.end == parser->end) {
12223 breakpoint = NULL;
12224 break;
12225 }
12226
12227 pm_regexp_token_buffer_escape(parser, &token_buffer);
12228 uint8_t peeked = peek(parser);
12229
12230 switch (peeked) {
12231 case '\r':
12232 parser->current.end++;
12233 if (peek(parser) != '\n') {
12234 if (lex_mode->as.regexp.terminator != '\r') {
12235 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12236 }
12237 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12238 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12239 break;
12240 }
12242 case '\n':
12243 if (parser->heredoc_end) {
12244 // ... if we are on the same line as a heredoc,
12245 // flush the heredoc and continue parsing after
12246 // heredoc_end.
12247 parser_flush_heredoc_end(parser);
12248 pm_regexp_token_buffer_copy(parser, &token_buffer);
12249 LEX(PM_TOKEN_STRING_CONTENT);
12250 } else {
12251 // ... else track the newline.
12252 pm_newline_list_append(&parser->newline_list, parser->current.end);
12253 }
12254
12255 parser->current.end++;
12256 break;
12257 case 'c':
12258 case 'C':
12259 case 'M':
12260 case 'u':
12261 case 'x':
12262 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12263 break;
12264 default:
12265 if (lex_mode->as.regexp.terminator == peeked) {
12266 // Some characters when they are used as the
12267 // terminator also receive an escape. They are
12268 // enumerated here.
12269 switch (peeked) {
12270 case '$': case ')': case '*': case '+':
12271 case '.': case '>': case '?': case ']':
12272 case '^': case '|': case '}':
12273 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12274 break;
12275 default:
12276 break;
12277 }
12278
12279 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12280 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12281 parser->current.end++;
12282 break;
12283 }
12284
12285 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12286 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12287 break;
12288 }
12289
12290 token_buffer.base.cursor = parser->current.end;
12291 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12292 break;
12293 }
12294 case '#': {
12295 // If we hit a #, then we will attempt to lex
12296 // interpolation.
12297 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12298
12299 if (type == PM_TOKEN_NOT_PROVIDED) {
12300 // If we haven't returned at this point then we had
12301 // something that looked like an interpolated class or
12302 // instance variable like "#@" but wasn't actually. In
12303 // this case we'll just skip to the next breakpoint.
12304 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12305 break;
12306 }
12307
12308 if (type == PM_TOKEN_STRING_CONTENT) {
12309 pm_regexp_token_buffer_flush(parser, &token_buffer);
12310 }
12311
12312 LEX(type);
12313 }
12314 default:
12315 assert(false && "unreachable");
12316 break;
12317 }
12318 }
12319
12320 if (parser->current.end > parser->current.start) {
12321 pm_regexp_token_buffer_flush(parser, &token_buffer);
12322 LEX(PM_TOKEN_STRING_CONTENT);
12323 }
12324
12325 // If we were unable to find a breakpoint, then this token hits the
12326 // end of the file.
12327 parser->current.end = parser->end;
12328 pm_regexp_token_buffer_flush(parser, &token_buffer);
12329 LEX(PM_TOKEN_STRING_CONTENT);
12330 }
12331 case PM_LEX_STRING: {
12332 // First, we'll set to start of this token to be the current end.
12333 if (parser->next_start == NULL) {
12334 parser->current.start = parser->current.end;
12335 } else {
12336 parser->current.start = parser->next_start;
12337 parser->current.end = parser->next_start;
12338 parser->next_start = NULL;
12339 }
12340
12341 // We'll check if we're at the end of the file. If we are, then we need to
12342 // return the EOF token.
12343 if (parser->current.end >= parser->end) {
12344 LEX(PM_TOKEN_EOF);
12345 }
12346
12347 // These are the places where we need to split up the content of the
12348 // string. We'll use strpbrk to find the first of these characters.
12349 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12350 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12351 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12352
12353 // If we haven't found an escape yet, then this buffer will be
12354 // unallocated since we can refer directly to the source string.
12355 pm_token_buffer_t token_buffer = { 0 };
12356
12357 while (breakpoint != NULL) {
12358 // If we hit the incrementor, then we'll increment then nesting and
12359 // continue lexing.
12360 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12361 lex_mode->as.string.nesting++;
12362 parser->current.end = breakpoint + 1;
12363 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12364 continue;
12365 }
12366
12367 uint8_t term = lex_mode->as.string.terminator;
12368 bool is_terminator = (*breakpoint == term);
12369
12370 // If the terminator is newline, we need to consider \r\n _also_ a newline
12371 // For example: `%r\nfoo\r\n`
12372 // The string should be /foo/, not /foo\r/
12373 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12374 if (term == '\n') {
12375 is_terminator = true;
12376 }
12377
12378 // If the terminator is a CR, but we see a CRLF, we need to
12379 // treat the CRLF as a newline, meaning this is _not_ the
12380 // terminator
12381 if (term == '\r') {
12382 is_terminator = false;
12383 }
12384 }
12385
12386 // Note that we have to check the terminator here first because we could
12387 // potentially be parsing a % string that has a # character as the
12388 // terminator.
12389 if (is_terminator) {
12390 // If this terminator doesn't actually close the string, then we need
12391 // to continue on past it.
12392 if (lex_mode->as.string.nesting > 0) {
12393 parser->current.end = breakpoint + 1;
12394 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12395 lex_mode->as.string.nesting--;
12396 continue;
12397 }
12398
12399 // Here we've hit the terminator. If we have already consumed content
12400 // then we need to return that content as string content first.
12401 if (breakpoint > parser->current.start) {
12402 parser->current.end = breakpoint;
12403 pm_token_buffer_flush(parser, &token_buffer);
12404 LEX(PM_TOKEN_STRING_CONTENT);
12405 }
12406
12407 // Otherwise we need to switch back to the parent lex mode and
12408 // return the end of the string.
12409 size_t eol_length = match_eol_at(parser, breakpoint);
12410 if (eol_length) {
12411 parser->current.end = breakpoint + eol_length;
12412 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12413 } else {
12414 parser->current.end = breakpoint + 1;
12415 }
12416
12417 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12418 parser->current.end++;
12419 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12420 lex_mode_pop(parser);
12421 LEX(PM_TOKEN_LABEL_END);
12422 }
12423
12424 lex_state_set(parser, PM_LEX_STATE_END);
12425 lex_mode_pop(parser);
12426 LEX(PM_TOKEN_STRING_END);
12427 }
12428
12429 switch (*breakpoint) {
12430 case '\0':
12431 // Skip directly past the null character.
12432 parser->current.end = breakpoint + 1;
12433 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12434 break;
12435 case '\r':
12436 if (peek_at(parser, breakpoint + 1) != '\n') {
12437 parser->current.end = breakpoint + 1;
12438 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12439 break;
12440 }
12441
12442 // If we hit a \r\n sequence, then we need to treat it
12443 // as a newline.
12444 breakpoint++;
12445 parser->current.end = breakpoint;
12446 pm_token_buffer_escape(parser, &token_buffer);
12447 token_buffer.cursor = breakpoint;
12448
12450 case '\n':
12451 // When we hit a newline, we need to flush any potential
12452 // heredocs. Note that this has to happen after we check
12453 // for the terminator in case the terminator is a
12454 // newline character.
12455 if (parser->heredoc_end == NULL) {
12456 pm_newline_list_append(&parser->newline_list, breakpoint);
12457 parser->current.end = breakpoint + 1;
12458 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12459 break;
12460 }
12461
12462 parser->current.end = breakpoint + 1;
12463 parser_flush_heredoc_end(parser);
12464 pm_token_buffer_flush(parser, &token_buffer);
12465 LEX(PM_TOKEN_STRING_CONTENT);
12466 case '\\': {
12467 // Here we hit escapes.
12468 parser->current.end = breakpoint + 1;
12469
12470 // If we've hit the end of the file, then break out of
12471 // the loop by setting the breakpoint to NULL.
12472 if (parser->current.end == parser->end) {
12473 breakpoint = NULL;
12474 continue;
12475 }
12476
12477 pm_token_buffer_escape(parser, &token_buffer);
12478 uint8_t peeked = peek(parser);
12479
12480 switch (peeked) {
12481 case '\\':
12482 pm_token_buffer_push_byte(&token_buffer, '\\');
12483 parser->current.end++;
12484 break;
12485 case '\r':
12486 parser->current.end++;
12487 if (peek(parser) != '\n') {
12488 if (!lex_mode->as.string.interpolation) {
12489 pm_token_buffer_push_byte(&token_buffer, '\\');
12490 }
12491 pm_token_buffer_push_byte(&token_buffer, '\r');
12492 break;
12493 }
12495 case '\n':
12496 if (!lex_mode->as.string.interpolation) {
12497 pm_token_buffer_push_byte(&token_buffer, '\\');
12498 pm_token_buffer_push_byte(&token_buffer, '\n');
12499 }
12500
12501 if (parser->heredoc_end) {
12502 // ... if we are on the same line as a heredoc,
12503 // flush the heredoc and continue parsing after
12504 // heredoc_end.
12505 parser_flush_heredoc_end(parser);
12506 pm_token_buffer_copy(parser, &token_buffer);
12507 LEX(PM_TOKEN_STRING_CONTENT);
12508 } else {
12509 // ... else track the newline.
12510 pm_newline_list_append(&parser->newline_list, parser->current.end);
12511 }
12512
12513 parser->current.end++;
12514 break;
12515 default:
12516 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12517 pm_token_buffer_push_byte(&token_buffer, peeked);
12518 parser->current.end++;
12519 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12520 pm_token_buffer_push_byte(&token_buffer, peeked);
12521 parser->current.end++;
12522 } else if (lex_mode->as.string.interpolation) {
12523 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12524 } else {
12525 pm_token_buffer_push_byte(&token_buffer, '\\');
12526 pm_token_buffer_push_escaped(&token_buffer, parser);
12527 }
12528
12529 break;
12530 }
12531
12532 token_buffer.cursor = parser->current.end;
12533 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12534 break;
12535 }
12536 case '#': {
12537 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12538
12539 if (type == PM_TOKEN_NOT_PROVIDED) {
12540 // If we haven't returned at this point then we had something that
12541 // looked like an interpolated class or instance variable like "#@"
12542 // but wasn't actually. In this case we'll just skip to the next
12543 // breakpoint.
12544 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12545 break;
12546 }
12547
12548 if (type == PM_TOKEN_STRING_CONTENT) {
12549 pm_token_buffer_flush(parser, &token_buffer);
12550 }
12551
12552 LEX(type);
12553 }
12554 default:
12555 assert(false && "unreachable");
12556 }
12557 }
12558
12559 if (parser->current.end > parser->current.start) {
12560 pm_token_buffer_flush(parser, &token_buffer);
12561 LEX(PM_TOKEN_STRING_CONTENT);
12562 }
12563
12564 // If we've hit the end of the string, then this is an unterminated
12565 // string. In that case we'll return a string content token.
12566 parser->current.end = parser->end;
12567 pm_token_buffer_flush(parser, &token_buffer);
12568 LEX(PM_TOKEN_STRING_CONTENT);
12569 }
12570 case PM_LEX_HEREDOC: {
12571 // First, we'll set to start of this token.
12572 if (parser->next_start == NULL) {
12573 parser->current.start = parser->current.end;
12574 } else {
12575 parser->current.start = parser->next_start;
12576 parser->current.end = parser->next_start;
12577 parser->heredoc_end = NULL;
12578 parser->next_start = NULL;
12579 }
12580
12581 // Now let's grab the information about the identifier off of the
12582 // current lex mode.
12583 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12584 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12585
12586 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12587 lex_mode->as.heredoc.line_continuation = false;
12588
12589 // We'll check if we're at the end of the file. If we are, then we
12590 // will add an error (because we weren't able to find the
12591 // terminator) but still continue parsing so that content after the
12592 // declaration of the heredoc can be parsed.
12593 if (parser->current.end >= parser->end) {
12594 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12595 parser->next_start = lex_mode->as.heredoc.next_start;
12596 parser->heredoc_end = parser->current.end;
12597 lex_state_set(parser, PM_LEX_STATE_END);
12598 lex_mode_pop(parser);
12599 LEX(PM_TOKEN_HEREDOC_END);
12600 }
12601
12602 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12603 size_t ident_length = heredoc_lex_mode->ident_length;
12604
12605 // If we are immediately following a newline and we have hit the
12606 // terminator, then we need to return the ending of the heredoc.
12607 if (current_token_starts_line(parser)) {
12608 const uint8_t *start = parser->current.start;
12609
12610 if (!line_continuation && (start + ident_length <= parser->end)) {
12611 const uint8_t *newline = next_newline(start, parser->end - start);
12612 const uint8_t *ident_end = newline;
12613 const uint8_t *terminator_end = newline;
12614
12615 if (newline == NULL) {
12616 terminator_end = parser->end;
12617 ident_end = parser->end;
12618 } else {
12619 terminator_end++;
12620 if (newline[-1] == '\r') {
12621 ident_end--; // Remove \r
12622 }
12623 }
12624
12625 const uint8_t *terminator_start = ident_end - ident_length;
12626 const uint8_t *cursor = start;
12627
12628 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12629 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12630 cursor++;
12631 }
12632 }
12633
12634 if (
12635 (cursor == terminator_start) &&
12636 (memcmp(terminator_start, ident_start, ident_length) == 0)
12637 ) {
12638 if (newline != NULL) {
12639 pm_newline_list_append(&parser->newline_list, newline);
12640 }
12641
12642 parser->current.end = terminator_end;
12643 if (*lex_mode->as.heredoc.next_start == '\\') {
12644 parser->next_start = NULL;
12645 } else {
12646 parser->next_start = lex_mode->as.heredoc.next_start;
12647 parser->heredoc_end = parser->current.end;
12648 }
12649
12650 lex_state_set(parser, PM_LEX_STATE_END);
12651 lex_mode_pop(parser);
12652 LEX(PM_TOKEN_HEREDOC_END);
12653 }
12654 }
12655
12656 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12657 if (
12658 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12659 lex_mode->as.heredoc.common_whitespace != NULL &&
12660 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12661 peek_at(parser, start) != '\n'
12662 ) {
12663 *lex_mode->as.heredoc.common_whitespace = whitespace;
12664 }
12665 }
12666
12667 // Otherwise we'll be parsing string content. These are the places
12668 // where we need to split up the content of the heredoc. We'll use
12669 // strpbrk to find the first of these characters.
12670 uint8_t breakpoints[] = "\r\n\\#";
12671
12672 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12673 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12674 breakpoints[3] = '\0';
12675 }
12676
12677 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12678 pm_token_buffer_t token_buffer = { 0 };
12679 bool was_line_continuation = false;
12680
12681 while (breakpoint != NULL) {
12682 switch (*breakpoint) {
12683 case '\0':
12684 // Skip directly past the null character.
12685 parser->current.end = breakpoint + 1;
12686 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12687 break;
12688 case '\r':
12689 parser->current.end = breakpoint + 1;
12690
12691 if (peek_at(parser, breakpoint + 1) != '\n') {
12692 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12693 break;
12694 }
12695
12696 // If we hit a \r\n sequence, then we want to replace it
12697 // with a single \n character in the final string.
12698 breakpoint++;
12699 pm_token_buffer_escape(parser, &token_buffer);
12700 token_buffer.cursor = breakpoint;
12701
12703 case '\n': {
12704 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12705 parser_flush_heredoc_end(parser);
12706 parser->current.end = breakpoint + 1;
12707 pm_token_buffer_flush(parser, &token_buffer);
12708 LEX(PM_TOKEN_STRING_CONTENT);
12709 }
12710
12711 pm_newline_list_append(&parser->newline_list, breakpoint);
12712
12713 // If we have a - or ~ heredoc, then we can match after
12714 // some leading whitespace.
12715 const uint8_t *start = breakpoint + 1;
12716
12717 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12718 // We want to match the terminator starting from the end of the line in case
12719 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12720 const uint8_t *newline = next_newline(start, parser->end - start);
12721
12722 if (newline == NULL) {
12723 newline = parser->end;
12724 } else if (newline[-1] == '\r') {
12725 newline--; // Remove \r
12726 }
12727
12728 // Start of a possible terminator.
12729 const uint8_t *terminator_start = newline - ident_length;
12730
12731 // Cursor to check for the leading whitespace. We skip the
12732 // leading whitespace if we have a - or ~ heredoc.
12733 const uint8_t *cursor = start;
12734
12735 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12736 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12737 cursor++;
12738 }
12739 }
12740
12741 if (
12742 cursor == terminator_start &&
12743 (memcmp(terminator_start, ident_start, ident_length) == 0)
12744 ) {
12745 parser->current.end = breakpoint + 1;
12746 pm_token_buffer_flush(parser, &token_buffer);
12747 LEX(PM_TOKEN_STRING_CONTENT);
12748 }
12749 }
12750
12751 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12752
12753 // If we have hit a newline that is followed by a valid
12754 // terminator, then we need to return the content of the
12755 // heredoc here as string content. Then, the next time a
12756 // token is lexed, it will match again and return the
12757 // end of the heredoc.
12758 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12759 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12760 *lex_mode->as.heredoc.common_whitespace = whitespace;
12761 }
12762
12763 parser->current.end = breakpoint + 1;
12764 pm_token_buffer_flush(parser, &token_buffer);
12765 LEX(PM_TOKEN_STRING_CONTENT);
12766 }
12767
12768 // Otherwise we hit a newline and it wasn't followed by
12769 // a terminator, so we can continue parsing.
12770 parser->current.end = breakpoint + 1;
12771 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12772 break;
12773 }
12774 case '\\': {
12775 // If we hit an escape, then we need to skip past
12776 // however many characters the escape takes up. However
12777 // it's important that if \n or \r\n are escaped, we
12778 // stop looping before the newline and not after the
12779 // newline so that we can still potentially find the
12780 // terminator of the heredoc.
12781 parser->current.end = breakpoint + 1;
12782
12783 // If we've hit the end of the file, then break out of
12784 // the loop by setting the breakpoint to NULL.
12785 if (parser->current.end == parser->end) {
12786 breakpoint = NULL;
12787 continue;
12788 }
12789
12790 pm_token_buffer_escape(parser, &token_buffer);
12791 uint8_t peeked = peek(parser);
12792
12793 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12794 switch (peeked) {
12795 case '\r':
12796 parser->current.end++;
12797 if (peek(parser) != '\n') {
12798 pm_token_buffer_push_byte(&token_buffer, '\\');
12799 pm_token_buffer_push_byte(&token_buffer, '\r');
12800 break;
12801 }
12803 case '\n':
12804 pm_token_buffer_push_byte(&token_buffer, '\\');
12805 pm_token_buffer_push_byte(&token_buffer, '\n');
12806 token_buffer.cursor = parser->current.end + 1;
12807 breakpoint = parser->current.end;
12808 continue;
12809 default:
12810 pm_token_buffer_push_byte(&token_buffer, '\\');
12811 pm_token_buffer_push_escaped(&token_buffer, parser);
12812 break;
12813 }
12814 } else {
12815 switch (peeked) {
12816 case '\r':
12817 parser->current.end++;
12818 if (peek(parser) != '\n') {
12819 pm_token_buffer_push_byte(&token_buffer, '\r');
12820 break;
12821 }
12823 case '\n':
12824 // If we are in a tilde here, we should
12825 // break out of the loop and return the
12826 // string content.
12827 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12828 const uint8_t *end = parser->current.end;
12829 pm_newline_list_append(&parser->newline_list, end);
12830
12831 // Here we want the buffer to only
12832 // include up to the backslash.
12833 parser->current.end = breakpoint;
12834 pm_token_buffer_flush(parser, &token_buffer);
12835
12836 // Now we can advance the end of the
12837 // token past the newline.
12838 parser->current.end = end + 1;
12839 lex_mode->as.heredoc.line_continuation = true;
12840 LEX(PM_TOKEN_STRING_CONTENT);
12841 }
12842
12843 was_line_continuation = true;
12844 token_buffer.cursor = parser->current.end + 1;
12845 breakpoint = parser->current.end;
12846 continue;
12847 default:
12848 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12849 break;
12850 }
12851 }
12852
12853 token_buffer.cursor = parser->current.end;
12854 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12855 break;
12856 }
12857 case '#': {
12858 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12859
12860 if (type == PM_TOKEN_NOT_PROVIDED) {
12861 // If we haven't returned at this point then we had
12862 // something that looked like an interpolated class
12863 // or instance variable like "#@" but wasn't
12864 // actually. In this case we'll just skip to the
12865 // next breakpoint.
12866 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12867 break;
12868 }
12869
12870 if (type == PM_TOKEN_STRING_CONTENT) {
12871 pm_token_buffer_flush(parser, &token_buffer);
12872 }
12873
12874 LEX(type);
12875 }
12876 default:
12877 assert(false && "unreachable");
12878 }
12879
12880 was_line_continuation = false;
12881 }
12882
12883 if (parser->current.end > parser->current.start) {
12884 parser->current.end = parser->end;
12885 pm_token_buffer_flush(parser, &token_buffer);
12886 LEX(PM_TOKEN_STRING_CONTENT);
12887 }
12888
12889 // If we've hit the end of the string, then this is an unterminated
12890 // heredoc. In that case we'll return a string content token.
12891 parser->current.end = parser->end;
12892 pm_token_buffer_flush(parser, &token_buffer);
12893 LEX(PM_TOKEN_STRING_CONTENT);
12894 }
12895 }
12896
12897 assert(false && "unreachable");
12898}
12899
12900#undef LEX
12901
12902/******************************************************************************/
12903/* Parse functions */
12904/******************************************************************************/
12905
12914typedef enum {
12915 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12916 PM_BINDING_POWER_STATEMENT = 2,
12917 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12918 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12919 PM_BINDING_POWER_COMPOSITION = 8, // and or
12920 PM_BINDING_POWER_NOT = 10, // not
12921 PM_BINDING_POWER_MATCH = 12, // => in
12922 PM_BINDING_POWER_DEFINED = 14, // defined?
12923 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12924 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12925 PM_BINDING_POWER_TERNARY = 20, // ?:
12926 PM_BINDING_POWER_RANGE = 22, // .. ...
12927 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12928 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12929 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12930 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12931 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12932 PM_BINDING_POWER_BITWISE_AND = 34, // &
12933 PM_BINDING_POWER_SHIFT = 36, // << >>
12934 PM_BINDING_POWER_TERM = 38, // + -
12935 PM_BINDING_POWER_FACTOR = 40, // * / %
12936 PM_BINDING_POWER_UMINUS = 42, // -@
12937 PM_BINDING_POWER_EXPONENT = 44, // **
12938 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12939 PM_BINDING_POWER_INDEX = 48, // [] []=
12940 PM_BINDING_POWER_CALL = 50, // :: .
12941 PM_BINDING_POWER_MAX = 52
12942} pm_binding_power_t;
12943
12948typedef struct {
12950 pm_binding_power_t left;
12951
12953 pm_binding_power_t right;
12954
12957
12964
12965#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12966#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12967#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12968#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12969#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12970
12971pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12972 // rescue
12973 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12974
12975 // if unless until while
12976 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12977 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12978 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12979 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12980
12981 // and or
12982 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12983 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12984
12985 // => in
12986 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12987 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12988
12989 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12990 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12991 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12992 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12993 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12994 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12995 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12996 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12997 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12998 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12999 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13000 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13001 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13002 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13003 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13004
13005 // ?:
13006 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13007
13008 // .. ...
13009 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13010 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13011 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13012 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13013
13014 // ||
13015 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13016
13017 // &&
13018 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13019
13020 // != !~ == === =~ <=>
13021 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13022 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13023 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13024 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13025 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13026 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13027
13028 // > >= < <=
13029 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13030 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13031 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13032 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13033
13034 // ^ |
13035 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13036 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13037
13038 // &
13039 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13040
13041 // >> <<
13042 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13043 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13044
13045 // - +
13046 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13047 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13048
13049 // % / *
13050 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13051 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13052 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13053 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13054
13055 // -@
13056 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13057 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13058
13059 // **
13060 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13061 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13062
13063 // ! ~ +@
13064 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13065 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13066 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13067
13068 // [
13069 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13070
13071 // :: . &.
13072 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13073 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13074 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13075};
13076
13077#undef BINDING_POWER_ASSIGNMENT
13078#undef LEFT_ASSOCIATIVE
13079#undef RIGHT_ASSOCIATIVE
13080#undef RIGHT_ASSOCIATIVE_UNARY
13081
13085static inline bool
13086match1(const pm_parser_t *parser, pm_token_type_t type) {
13087 return parser->current.type == type;
13088}
13089
13093static inline bool
13094match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13095 return match1(parser, type1) || match1(parser, type2);
13096}
13097
13101static inline bool
13102match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13103 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13104}
13105
13109static inline bool
13110match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13111 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13112}
13113
13117static inline bool
13118match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13119 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13120}
13121
13125static inline bool
13126match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13127 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13128}
13129
13136static bool
13137accept1(pm_parser_t *parser, pm_token_type_t type) {
13138 if (match1(parser, type)) {
13139 parser_lex(parser);
13140 return true;
13141 }
13142 return false;
13143}
13144
13149static inline bool
13150accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13151 if (match2(parser, type1, type2)) {
13152 parser_lex(parser);
13153 return true;
13154 }
13155 return false;
13156}
13157
13169static void
13170expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13171 if (accept1(parser, type)) return;
13172
13173 const uint8_t *location = parser->previous.end;
13174 pm_parser_err(parser, location, location, diag_id);
13175
13176 parser->previous.start = location;
13177 parser->previous.type = PM_TOKEN_MISSING;
13178}
13179
13184static void
13185expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13186 if (accept2(parser, type1, type2)) return;
13187
13188 const uint8_t *location = parser->previous.end;
13189 pm_parser_err(parser, location, location, diag_id);
13190
13191 parser->previous.start = location;
13192 parser->previous.type = PM_TOKEN_MISSING;
13193}
13194
13199static void
13200expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13201 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13202 parser_lex(parser);
13203 } else {
13204 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13205 parser->previous.start = parser->previous.end;
13206 parser->previous.type = PM_TOKEN_MISSING;
13207 }
13208}
13209
13210static pm_node_t *
13211parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13212
13217static pm_node_t *
13218parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13219 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13220 pm_assert_value_expression(parser, node);
13221 return node;
13222}
13223
13242static inline bool
13243token_begins_expression_p(pm_token_type_t type) {
13244 switch (type) {
13245 case PM_TOKEN_EQUAL_GREATER:
13246 case PM_TOKEN_KEYWORD_IN:
13247 // We need to special case this because it is a binary operator that
13248 // should not be marked as beginning an expression.
13249 return false;
13250 case PM_TOKEN_BRACE_RIGHT:
13251 case PM_TOKEN_BRACKET_RIGHT:
13252 case PM_TOKEN_COLON:
13253 case PM_TOKEN_COMMA:
13254 case PM_TOKEN_EMBEXPR_END:
13255 case PM_TOKEN_EOF:
13256 case PM_TOKEN_LAMBDA_BEGIN:
13257 case PM_TOKEN_KEYWORD_DO:
13258 case PM_TOKEN_KEYWORD_DO_LOOP:
13259 case PM_TOKEN_KEYWORD_END:
13260 case PM_TOKEN_KEYWORD_ELSE:
13261 case PM_TOKEN_KEYWORD_ELSIF:
13262 case PM_TOKEN_KEYWORD_ENSURE:
13263 case PM_TOKEN_KEYWORD_THEN:
13264 case PM_TOKEN_KEYWORD_RESCUE:
13265 case PM_TOKEN_KEYWORD_WHEN:
13266 case PM_TOKEN_NEWLINE:
13267 case PM_TOKEN_PARENTHESIS_RIGHT:
13268 case PM_TOKEN_SEMICOLON:
13269 // The reason we need this short-circuit is because we're using the
13270 // binding powers table to tell us if the subsequent token could
13271 // potentially be the start of an expression. If there _is_ a binding
13272 // power for one of these tokens, then we should remove it from this list
13273 // and let it be handled by the default case below.
13274 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13275 return false;
13276 case PM_TOKEN_UAMPERSAND:
13277 // This is a special case because this unary operator cannot appear
13278 // as a general operator, it only appears in certain circumstances.
13279 return false;
13280 case PM_TOKEN_UCOLON_COLON:
13281 case PM_TOKEN_UMINUS:
13282 case PM_TOKEN_UMINUS_NUM:
13283 case PM_TOKEN_UPLUS:
13284 case PM_TOKEN_BANG:
13285 case PM_TOKEN_TILDE:
13286 case PM_TOKEN_UDOT_DOT:
13287 case PM_TOKEN_UDOT_DOT_DOT:
13288 // These unary tokens actually do have binding power associated with them
13289 // so that we can correctly place them into the precedence order. But we
13290 // want them to be marked as beginning an expression, so we need to
13291 // special case them here.
13292 return true;
13293 default:
13294 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13295 }
13296}
13297
13302static pm_node_t *
13303parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13304 if (accept1(parser, PM_TOKEN_USTAR)) {
13305 pm_token_t operator = parser->previous;
13306 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13307 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13308 }
13309
13310 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13311}
13312
13317static void
13318parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13319 // The method name needs to change. If we previously had
13320 // foo, we now need foo=. In this case we'll allocate a new
13321 // owned string, copy the previous method name in, and
13322 // append an =.
13323 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13324 size_t length = constant->length;
13325 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13326 if (name == NULL) return;
13327
13328 memcpy(name, constant->start, length);
13329 name[length] = '=';
13330
13331 // Now switch the name to the new string.
13332 // This silences clang analyzer warning about leak of memory pointed by `name`.
13333 // NOLINTNEXTLINE(clang-analyzer-*)
13334 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13335}
13336
13343static pm_node_t *
13344parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13345 switch (PM_NODE_TYPE(target)) {
13346 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13347 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13348 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13349 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13350 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13351 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13352 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13353 default: break;
13354 }
13355
13356 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13357 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13358
13359 pm_node_destroy(parser, target);
13360 return (pm_node_t *) result;
13361}
13362
13368static void
13369parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13370 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13371
13372 for (size_t index = 0; index < implicit_parameters->size; index++) {
13373 if (implicit_parameters->nodes[index] == node) {
13374 // If the node is not the last one in the list, we need to shift the
13375 // remaining nodes down to fill the gap. This is extremely unlikely
13376 // to happen.
13377 if (index != implicit_parameters->size - 1) {
13378 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13379 }
13380
13381 implicit_parameters->size--;
13382 break;
13383 }
13384 }
13385}
13386
13395static pm_node_t *
13396parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13397 switch (PM_NODE_TYPE(target)) {
13398 case PM_MISSING_NODE:
13399 return target;
13400 case PM_SOURCE_ENCODING_NODE:
13401 case PM_FALSE_NODE:
13402 case PM_SOURCE_FILE_NODE:
13403 case PM_SOURCE_LINE_NODE:
13404 case PM_NIL_NODE:
13405 case PM_SELF_NODE:
13406 case PM_TRUE_NODE: {
13407 // In these special cases, we have specific error messages and we
13408 // will replace them with local variable writes.
13409 return parse_unwriteable_target(parser, target);
13410 }
13411 case PM_CLASS_VARIABLE_READ_NODE:
13413 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
13414 return target;
13415 case PM_CONSTANT_PATH_NODE:
13416 if (context_def_p(parser)) {
13417 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13418 }
13419
13421 target->type = PM_CONSTANT_PATH_TARGET_NODE;
13422
13423 return target;
13424 case PM_CONSTANT_READ_NODE:
13425 if (context_def_p(parser)) {
13426 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13427 }
13428
13429 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13430 target->type = PM_CONSTANT_TARGET_NODE;
13431
13432 return target;
13433 case PM_BACK_REFERENCE_READ_NODE:
13434 case PM_NUMBERED_REFERENCE_READ_NODE:
13435 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13436 return target;
13437 case PM_GLOBAL_VARIABLE_READ_NODE:
13439 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13440 return target;
13441 case PM_LOCAL_VARIABLE_READ_NODE: {
13442 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13443 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13444 parse_target_implicit_parameter(parser, target);
13445 }
13446
13447 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13448 uint32_t name = cast->name;
13449 uint32_t depth = cast->depth;
13450 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13451
13453 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13454
13455 return target;
13456 }
13457 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13458 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13459 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13460
13461 parse_target_implicit_parameter(parser, target);
13462 pm_node_destroy(parser, target);
13463
13464 return node;
13465 }
13466 case PM_INSTANCE_VARIABLE_READ_NODE:
13468 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13469 return target;
13470 case PM_MULTI_TARGET_NODE:
13471 if (splat_parent) {
13472 // Multi target is not accepted in all positions. If this is one
13473 // of them, then we need to add an error.
13474 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13475 }
13476
13477 return target;
13478 case PM_SPLAT_NODE: {
13479 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13480
13481 if (splat->expression != NULL) {
13482 splat->expression = parse_target(parser, splat->expression, multiple, true);
13483 }
13484
13485 return (pm_node_t *) splat;
13486 }
13487 case PM_CALL_NODE: {
13488 pm_call_node_t *call = (pm_call_node_t *) target;
13489
13490 // If we have no arguments to the call node and we need this to be a
13491 // target then this is either a method call or a local variable
13492 // write.
13493 if (
13494 (call->message_loc.start != NULL) &&
13495 (call->message_loc.end[-1] != '!') &&
13496 (call->message_loc.end[-1] != '?') &&
13497 (call->opening_loc.start == NULL) &&
13498 (call->arguments == NULL) &&
13499 (call->block == NULL)
13500 ) {
13501 if (call->receiver == NULL) {
13502 // When we get here, we have a local variable write, because it
13503 // was previously marked as a method call but now we have an =.
13504 // This looks like:
13505 //
13506 // foo = 1
13507 //
13508 // When it was parsed in the prefix position, foo was seen as a
13509 // method call with no receiver and no arguments. Now we have an
13510 // =, so we know it's a local variable write.
13511 const pm_location_t message_loc = call->message_loc;
13512
13513 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13514 pm_node_destroy(parser, target);
13515
13516 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13517 }
13518
13519 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13520 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13521 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13522 }
13523
13524 parse_write_name(parser, &call->name);
13525 return (pm_node_t *) pm_call_target_node_create(parser, call);
13526 }
13527 }
13528
13529 // If there is no call operator and the message is "[]" then this is
13530 // an aref expression, and we can transform it into an aset
13531 // expression.
13532 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13533 return (pm_node_t *) pm_index_target_node_create(parser, call);
13534 }
13535 }
13537 default:
13538 // In this case we have a node that we don't know how to convert
13539 // into a target. We need to treat it as an error. For now, we'll
13540 // mark it as an error and just skip right past it.
13541 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13542 return target;
13543 }
13544}
13545
13550static pm_node_t *
13551parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13552 pm_node_t *result = parse_target(parser, target, multiple, false);
13553
13554 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13555 // parens after the targets.
13556 if (
13557 !match1(parser, PM_TOKEN_EQUAL) &&
13558 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13559 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13560 ) {
13561 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13562 }
13563
13564 return result;
13565}
13566
13571static pm_node_t *
13572parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13573 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13574
13575 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13576 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13577 }
13578
13579 return write;
13580}
13581
13585static pm_node_t *
13586parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13587 switch (PM_NODE_TYPE(target)) {
13588 case PM_MISSING_NODE:
13589 pm_node_destroy(parser, value);
13590 return target;
13591 case PM_CLASS_VARIABLE_READ_NODE: {
13592 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13593 pm_node_destroy(parser, target);
13594 return (pm_node_t *) node;
13595 }
13596 case PM_CONSTANT_PATH_NODE: {
13597 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13598
13599 if (context_def_p(parser)) {
13600 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13601 }
13602
13603 return parse_shareable_constant_write(parser, node);
13604 }
13605 case PM_CONSTANT_READ_NODE: {
13606 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13607
13608 if (context_def_p(parser)) {
13609 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13610 }
13611
13612 pm_node_destroy(parser, target);
13613 return parse_shareable_constant_write(parser, node);
13614 }
13615 case PM_BACK_REFERENCE_READ_NODE:
13616 case PM_NUMBERED_REFERENCE_READ_NODE:
13617 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13619 case PM_GLOBAL_VARIABLE_READ_NODE: {
13620 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13621 pm_node_destroy(parser, target);
13622 return (pm_node_t *) node;
13623 }
13624 case PM_LOCAL_VARIABLE_READ_NODE: {
13626
13627 pm_constant_id_t name = local_read->name;
13628 pm_location_t name_loc = target->location;
13629
13630 uint32_t depth = local_read->depth;
13631 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13632
13633 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13634 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13635 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13636 parse_target_implicit_parameter(parser, target);
13637 }
13638
13639 pm_locals_unread(&scope->locals, name);
13640 pm_node_destroy(parser, target);
13641
13642 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13643 }
13644 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13645 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13646 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13647
13648 parse_target_implicit_parameter(parser, target);
13649 pm_node_destroy(parser, target);
13650
13651 return node;
13652 }
13653 case PM_INSTANCE_VARIABLE_READ_NODE: {
13654 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13655 pm_node_destroy(parser, target);
13656 return write_node;
13657 }
13658 case PM_MULTI_TARGET_NODE:
13659 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13660 case PM_SPLAT_NODE: {
13661 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13662
13663 if (splat->expression != NULL) {
13664 splat->expression = parse_write(parser, splat->expression, operator, value);
13665 }
13666
13667 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13668 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13669
13670 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13671 }
13672 case PM_CALL_NODE: {
13673 pm_call_node_t *call = (pm_call_node_t *) target;
13674
13675 // If we have no arguments to the call node and we need this to be a
13676 // target then this is either a method call or a local variable
13677 // write.
13678 if (
13679 (call->message_loc.start != NULL) &&
13680 (call->message_loc.end[-1] != '!') &&
13681 (call->message_loc.end[-1] != '?') &&
13682 (call->opening_loc.start == NULL) &&
13683 (call->arguments == NULL) &&
13684 (call->block == NULL)
13685 ) {
13686 if (call->receiver == NULL) {
13687 // When we get here, we have a local variable write, because it
13688 // was previously marked as a method call but now we have an =.
13689 // This looks like:
13690 //
13691 // foo = 1
13692 //
13693 // When it was parsed in the prefix position, foo was seen as a
13694 // method call with no receiver and no arguments. Now we have an
13695 // =, so we know it's a local variable write.
13696 const pm_location_t message = call->message_loc;
13697
13698 pm_parser_local_add_location(parser, message.start, message.end, 0);
13699 pm_node_destroy(parser, target);
13700
13701 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13702 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13703
13704 pm_refute_numbered_parameter(parser, message.start, message.end);
13705 return target;
13706 }
13707
13708 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13709 // When we get here, we have a method call, because it was
13710 // previously marked as a method call but now we have an =. This
13711 // looks like:
13712 //
13713 // foo.bar = 1
13714 //
13715 // When it was parsed in the prefix position, foo.bar was seen as a
13716 // method call with no arguments. Now we have an =, so we know it's
13717 // a method call with an argument. In this case we will create the
13718 // arguments node, parse the argument, and add it to the list.
13719 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13720 call->arguments = arguments;
13721
13722 pm_arguments_node_arguments_append(arguments, value);
13723 call->base.location.end = arguments->base.location.end;
13724
13725 parse_write_name(parser, &call->name);
13726 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13727
13728 return (pm_node_t *) call;
13729 }
13730 }
13731
13732 // If there is no call operator and the message is "[]" then this is
13733 // an aref expression, and we can transform it into an aset
13734 // expression.
13735 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13736 if (call->arguments == NULL) {
13737 call->arguments = pm_arguments_node_create(parser);
13738 }
13739
13740 pm_arguments_node_arguments_append(call->arguments, value);
13741 target->location.end = value->location.end;
13742
13743 // Replace the name with "[]=".
13744 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13745
13746 // Ensure that the arguments for []= don't contain keywords
13747 pm_index_arguments_check(parser, call->arguments, call->block);
13748 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13749
13750 return target;
13751 }
13752
13753 // If there are arguments on the call node, then it can't be a method
13754 // call ending with = or a local variable write, so it must be a
13755 // syntax error. In this case we'll fall through to our default
13756 // handling. We need to free the value that we parsed because there
13757 // is no way for us to attach it to the tree at this point.
13758 pm_node_destroy(parser, value);
13759 }
13761 default:
13762 // In this case we have a node that we don't know how to convert into a
13763 // target. We need to treat it as an error. For now, we'll mark it as an
13764 // error and just skip right past it.
13765 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13766 return target;
13767 }
13768}
13769
13776static pm_node_t *
13777parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13778 switch (PM_NODE_TYPE(target)) {
13779 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13780 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13781 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13782 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13783 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13784 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13785 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13786 default: break;
13787 }
13788
13789 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13790 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13791
13792 pm_node_destroy(parser, target);
13793 return (pm_node_t *) result;
13794}
13795
13806static pm_node_t *
13807parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13808 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13809
13810 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13811 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13812
13813 while (accept1(parser, PM_TOKEN_COMMA)) {
13814 if (accept1(parser, PM_TOKEN_USTAR)) {
13815 // Here we have a splat operator. It can have a name or be
13816 // anonymous. It can be the final target or be in the middle if
13817 // there haven't been any others yet.
13818 if (has_rest) {
13819 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13820 }
13821
13822 pm_token_t star_operator = parser->previous;
13823 pm_node_t *name = NULL;
13824
13825 if (token_begins_expression_p(parser->current.type)) {
13826 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13827 name = parse_target(parser, name, true, true);
13828 }
13829
13830 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13831 pm_multi_target_node_targets_append(parser, result, splat);
13832 has_rest = true;
13833 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13834 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13835 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13836 target = parse_target(parser, target, true, false);
13837
13838 pm_multi_target_node_targets_append(parser, result, target);
13839 context_pop(parser);
13840 } else if (token_begins_expression_p(parser->current.type)) {
13841 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13842 target = parse_target(parser, target, true, false);
13843
13844 pm_multi_target_node_targets_append(parser, result, target);
13845 } else if (!match1(parser, PM_TOKEN_EOF)) {
13846 // If we get here, then we have a trailing , in a multi target node.
13847 // We'll add an implicit rest node to represent this.
13848 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13849 pm_multi_target_node_targets_append(parser, result, rest);
13850 break;
13851 }
13852 }
13853
13854 return (pm_node_t *) result;
13855}
13856
13861static pm_node_t *
13862parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13863 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13864 accept1(parser, PM_TOKEN_NEWLINE);
13865
13866 // Ensure that we have either an = or a ) after the targets.
13867 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13868 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13869 }
13870
13871 return result;
13872}
13873
13877static pm_statements_node_t *
13878parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13879 // First, skip past any optional terminators that might be at the beginning
13880 // of the statements.
13881 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13882
13883 // If we have a terminator, then we can just return NULL.
13884 if (context_terminator(context, &parser->current)) return NULL;
13885
13886 pm_statements_node_t *statements = pm_statements_node_create(parser);
13887
13888 // At this point we know we have at least one statement, and that it
13889 // immediately follows the current token.
13890 context_push(parser, context);
13891
13892 while (true) {
13893 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13894 pm_statements_node_body_append(parser, statements, node, true);
13895
13896 // If we're recovering from a syntax error, then we need to stop parsing
13897 // the statements now.
13898 if (parser->recovering) {
13899 // If this is the level of context where the recovery has happened,
13900 // then we can mark the parser as done recovering.
13901 if (context_terminator(context, &parser->current)) parser->recovering = false;
13902 break;
13903 }
13904
13905 // If we have a terminator, then we will parse all consecutive
13906 // terminators and then continue parsing the statements list.
13907 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13908 // If we have a terminator, then we will continue parsing the
13909 // statements list.
13910 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13911 if (context_terminator(context, &parser->current)) break;
13912
13913 // Now we can continue parsing the list of statements.
13914 continue;
13915 }
13916
13917 // At this point we have a list of statements that are not terminated by
13918 // a newline or semicolon. At this point we need to check if we're at
13919 // the end of the statements list. If we are, then we should break out
13920 // of the loop.
13921 if (context_terminator(context, &parser->current)) break;
13922
13923 // At this point, we have a syntax error, because the statement was not
13924 // terminated by a newline or semicolon, and we're not at the end of the
13925 // statements list. Ideally we should scan forward to determine if we
13926 // should insert a missing terminator or break out of parsing the
13927 // statements list at this point.
13928 //
13929 // We don't have that yet, so instead we'll do a more naive approach. If
13930 // we were unable to parse an expression, then we will skip past this
13931 // token and continue parsing the statements list. Otherwise we'll add
13932 // an error and continue parsing the statements list.
13933 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13934 parser_lex(parser);
13935
13936 // If we are at the end of the file, then we need to stop parsing
13937 // the statements entirely at this point. Mark the parser as
13938 // recovering, as we know that EOF closes the top-level context, and
13939 // then break out of the loop.
13940 if (match1(parser, PM_TOKEN_EOF)) {
13941 parser->recovering = true;
13942 break;
13943 }
13944
13945 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13946 if (context_terminator(context, &parser->current)) break;
13947 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13948 // This is an inlined version of accept1 because the error that we
13949 // want to add has varargs. If this happens again, we should
13950 // probably extract a helper function.
13951 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13952 parser->previous.start = parser->previous.end;
13953 parser->previous.type = PM_TOKEN_MISSING;
13954 }
13955 }
13956
13957 context_pop(parser);
13958 bool last_value = true;
13959 switch (context) {
13962 last_value = false;
13963 break;
13964 default:
13965 break;
13966 }
13967 pm_void_statements_check(parser, statements, last_value);
13968
13969 return statements;
13970}
13971
13976static void
13977pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13978 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13979
13980 if (duplicated != NULL) {
13981 pm_buffer_t buffer = { 0 };
13982 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13983
13984 pm_diagnostic_list_append_format(
13985 &parser->warning_list,
13986 duplicated->location.start,
13987 duplicated->location.end,
13988 PM_WARN_DUPLICATED_HASH_KEY,
13989 (int) pm_buffer_length(&buffer),
13990 pm_buffer_value(&buffer),
13991 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13992 );
13993
13994 pm_buffer_free(&buffer);
13995 }
13996}
13997
14002static void
14003pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14004 pm_node_t *previous;
14005
14006 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14007 pm_diagnostic_list_append_format(
14008 &parser->warning_list,
14009 node->location.start,
14010 node->location.end,
14011 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14012 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14013 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14014 );
14015 }
14016}
14017
14021static bool
14022parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14023 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
14024 bool contains_keyword_splat = false;
14025
14026 while (true) {
14027 pm_node_t *element;
14028
14029 switch (parser->current.type) {
14030 case PM_TOKEN_USTAR_STAR: {
14031 parser_lex(parser);
14032 pm_token_t operator = parser->previous;
14033 pm_node_t *value = NULL;
14034
14035 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14036 // If we're about to parse a nested hash that is being
14037 // pushed into this hash directly with **, then we want the
14038 // inner hash to share the static literals with the outer
14039 // hash.
14040 parser->current_hash_keys = literals;
14041 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14042 } else if (token_begins_expression_p(parser->current.type)) {
14043 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14044 } else {
14045 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14046 }
14047
14048 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14049 contains_keyword_splat = true;
14050 break;
14051 }
14052 case PM_TOKEN_LABEL: {
14053 pm_token_t label = parser->current;
14054 parser_lex(parser);
14055
14056 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14057 pm_hash_key_static_literals_add(parser, literals, key);
14058
14059 pm_token_t operator = not_provided(parser);
14060 pm_node_t *value = NULL;
14061
14062 if (token_begins_expression_p(parser->current.type)) {
14063 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14064 } else {
14065 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14066 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14067 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14068 } else {
14069 int depth = -1;
14070 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14071
14072 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14073 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14074 } else {
14075 depth = pm_parser_local_depth(parser, &identifier);
14076 }
14077
14078 if (depth == -1) {
14079 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14080 } else {
14081 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14082 }
14083 }
14084
14085 value->location.end++;
14086 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14087 }
14088
14089 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14090 break;
14091 }
14092 default: {
14093 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14094
14095 // Hash keys that are strings are automatically frozen. We will
14096 // mark that here.
14097 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14098 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14099 }
14100
14101 pm_hash_key_static_literals_add(parser, literals, key);
14102
14103 pm_token_t operator;
14104 if (pm_symbol_node_label_p(key)) {
14105 operator = not_provided(parser);
14106 } else {
14107 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14108 operator = parser->previous;
14109 }
14110
14111 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14112 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14113 break;
14114 }
14115 }
14116
14117 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14118 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14119 } else {
14120 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14121 }
14122
14123 // If there's no comma after the element, then we're done.
14124 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14125
14126 // If the next element starts with a label or a **, then we know we have
14127 // another element in the hash, so we'll continue parsing.
14128 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14129
14130 // Otherwise we need to check if the subsequent token begins an expression.
14131 // If it does, then we'll continue parsing.
14132 if (token_begins_expression_p(parser->current.type)) continue;
14133
14134 // Otherwise by default we will exit out of this loop.
14135 break;
14136 }
14137
14138 return contains_keyword_splat;
14139}
14140
14144static inline void
14145parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14146 if (arguments->arguments == NULL) {
14147 arguments->arguments = pm_arguments_node_create(parser);
14148 }
14149
14150 pm_arguments_node_arguments_append(arguments->arguments, argument);
14151}
14152
14156static void
14157parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14158 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14159
14160 // First we need to check if the next token is one that could be the start
14161 // of an argument. If it's not, then we can just return.
14162 if (
14163 match2(parser, terminator, PM_TOKEN_EOF) ||
14164 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14165 context_terminator(parser->current_context->context, &parser->current)
14166 ) {
14167 return;
14168 }
14169
14170 bool parsed_first_argument = false;
14171 bool parsed_bare_hash = false;
14172 bool parsed_block_argument = false;
14173 bool parsed_forwarding_arguments = false;
14174
14175 while (!match1(parser, PM_TOKEN_EOF)) {
14176 if (parsed_forwarding_arguments) {
14177 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14178 }
14179
14180 pm_node_t *argument = NULL;
14181
14182 switch (parser->current.type) {
14183 case PM_TOKEN_USTAR_STAR:
14184 case PM_TOKEN_LABEL: {
14185 if (parsed_bare_hash) {
14186 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14187 }
14188
14189 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14190 argument = (pm_node_t *) hash;
14191
14192 pm_static_literals_t hash_keys = { 0 };
14193 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14194
14195 parse_arguments_append(parser, arguments, argument);
14196
14197 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14198 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14199 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14200
14201 pm_static_literals_free(&hash_keys);
14202 parsed_bare_hash = true;
14203
14204 break;
14205 }
14206 case PM_TOKEN_UAMPERSAND: {
14207 parser_lex(parser);
14208 pm_token_t operator = parser->previous;
14209 pm_node_t *expression = NULL;
14210
14211 if (token_begins_expression_p(parser->current.type)) {
14212 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14213 } else {
14214 pm_parser_scope_forwarding_block_check(parser, &operator);
14215 }
14216
14217 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14218 if (parsed_block_argument) {
14219 parse_arguments_append(parser, arguments, argument);
14220 } else {
14221 arguments->block = argument;
14222 }
14223
14224 if (match1(parser, PM_TOKEN_COMMA)) {
14225 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14226 }
14227
14228 parsed_block_argument = true;
14229 break;
14230 }
14231 case PM_TOKEN_USTAR: {
14232 parser_lex(parser);
14233 pm_token_t operator = parser->previous;
14234
14235 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14236 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14237 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14238 if (parsed_bare_hash) {
14239 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14240 }
14241 } else {
14242 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14243
14244 if (parsed_bare_hash) {
14245 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14246 }
14247
14248 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14249 }
14250
14251 parse_arguments_append(parser, arguments, argument);
14252 break;
14253 }
14254 case PM_TOKEN_UDOT_DOT_DOT: {
14255 if (accepts_forwarding) {
14256 parser_lex(parser);
14257
14258 if (token_begins_expression_p(parser->current.type)) {
14259 // If the token begins an expression then this ... was
14260 // not actually argument forwarding but was instead a
14261 // range.
14262 pm_token_t operator = parser->previous;
14263 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14264
14265 // If we parse a range, we need to validate that we
14266 // didn't accidentally violate the nonassoc rules of the
14267 // ... operator.
14268 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14269 pm_range_node_t *range = (pm_range_node_t *) right;
14270 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14271 }
14272
14273 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14274 } else {
14275 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14276 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14277 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14278 }
14279
14280 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14281 parse_arguments_append(parser, arguments, argument);
14282 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14283 arguments->has_forwarding = true;
14284 parsed_forwarding_arguments = true;
14285 break;
14286 }
14287 }
14288 }
14290 default: {
14291 if (argument == NULL) {
14292 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14293 }
14294
14295 bool contains_keywords = false;
14296 bool contains_keyword_splat = false;
14297
14298 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14299 if (parsed_bare_hash) {
14300 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14301 }
14302
14303 pm_token_t operator;
14304 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14305 operator = parser->previous;
14306 } else {
14307 operator = not_provided(parser);
14308 }
14309
14310 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14311 contains_keywords = true;
14312
14313 // Create the set of static literals for this hash.
14314 pm_static_literals_t hash_keys = { 0 };
14315 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14316
14317 // Finish parsing the one we are part way through.
14318 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14319 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14320
14321 pm_keyword_hash_node_elements_append(bare_hash, argument);
14322 argument = (pm_node_t *) bare_hash;
14323
14324 // Then parse more if we have a comma
14325 if (accept1(parser, PM_TOKEN_COMMA) && (
14326 token_begins_expression_p(parser->current.type) ||
14327 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14328 )) {
14329 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14330 }
14331
14332 pm_static_literals_free(&hash_keys);
14333 parsed_bare_hash = true;
14334 }
14335
14336 parse_arguments_append(parser, arguments, argument);
14337
14338 pm_node_flags_t flags = 0;
14339 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14340 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14341 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14342
14343 break;
14344 }
14345 }
14346
14347 parsed_first_argument = true;
14348
14349 // If parsing the argument failed, we need to stop parsing arguments.
14350 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14351
14352 // If the terminator of these arguments is not EOF, then we have a
14353 // specific token we're looking for. In that case we can accept a
14354 // newline here because it is not functioning as a statement terminator.
14355 bool accepted_newline = false;
14356 if (terminator != PM_TOKEN_EOF) {
14357 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14358 }
14359
14360 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14361 // If we previously were on a comma and we just parsed a bare hash,
14362 // then we want to continue parsing arguments. This is because the
14363 // comma was grabbed up by the hash parser.
14364 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14365 // If there was a comma, then we need to check if we also accepted a
14366 // newline. If we did, then this is a syntax error.
14367 if (accepted_newline) {
14368 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14369 }
14370 } else {
14371 // If there is no comma at the end of the argument list then we're
14372 // done parsing arguments and can break out of this loop.
14373 break;
14374 }
14375
14376 // If we hit the terminator, then that means we have a trailing comma so
14377 // we can accept that output as well.
14378 if (match1(parser, terminator)) break;
14379 }
14380}
14381
14393parse_required_destructured_parameter(pm_parser_t *parser) {
14394 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14395
14396 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14397 pm_multi_target_node_opening_set(node, &parser->previous);
14398
14399 do {
14400 pm_node_t *param;
14401
14402 // If we get here then we have a trailing comma, which isn't allowed in
14403 // the grammar. In other places, multi targets _do_ allow trailing
14404 // commas, so here we'll assume this is a mistake of the user not
14405 // knowing it's not allowed here.
14406 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14407 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14408 pm_multi_target_node_targets_append(parser, node, param);
14409 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14410 break;
14411 }
14412
14413 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14414 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14415 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14416 pm_token_t star = parser->previous;
14417 pm_node_t *value = NULL;
14418
14419 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14420 pm_token_t name = parser->previous;
14421 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14422 if (pm_parser_parameter_name_check(parser, &name)) {
14423 pm_node_flag_set_repeated_parameter(value);
14424 }
14425 pm_parser_local_add_token(parser, &name, 1);
14426 }
14427
14428 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14429 } else {
14430 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14431 pm_token_t name = parser->previous;
14432
14433 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14434 if (pm_parser_parameter_name_check(parser, &name)) {
14435 pm_node_flag_set_repeated_parameter(param);
14436 }
14437 pm_parser_local_add_token(parser, &name, 1);
14438 }
14439
14440 pm_multi_target_node_targets_append(parser, node, param);
14441 } while (accept1(parser, PM_TOKEN_COMMA));
14442
14443 accept1(parser, PM_TOKEN_NEWLINE);
14444 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14445 pm_multi_target_node_closing_set(node, &parser->previous);
14446
14447 return node;
14448}
14449
14454typedef enum {
14455 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14456 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14457 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14458 PM_PARAMETERS_ORDER_KEYWORDS,
14459 PM_PARAMETERS_ORDER_REST,
14460 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14461 PM_PARAMETERS_ORDER_OPTIONAL,
14462 PM_PARAMETERS_ORDER_NAMED,
14463 PM_PARAMETERS_ORDER_NONE,
14464} pm_parameters_order_t;
14465
14469static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14470 [0] = PM_PARAMETERS_NO_CHANGE,
14471 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14472 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14473 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14474 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14475 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14476 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14477 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14478 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14479 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14480 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14481 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14482};
14483
14491static bool
14492update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14493 pm_parameters_order_t state = parameters_ordering[token->type];
14494 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14495
14496 // If we see another ordered argument after a optional argument
14497 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14498 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14499 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14500 return true;
14501 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14502 return true;
14503 }
14504
14505 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14506 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14507 return false;
14508 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14509 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14510 return false;
14511 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14512 // We know what transition we failed on, so we can provide a better error here.
14513 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14514 return false;
14515 }
14516
14517 if (state < *current) *current = state;
14518 return true;
14519}
14520
14524static pm_parameters_node_t *
14525parse_parameters(
14526 pm_parser_t *parser,
14527 pm_binding_power_t binding_power,
14528 bool uses_parentheses,
14529 bool allows_trailing_comma,
14530 bool allows_forwarding_parameters,
14531 bool accepts_blocks_in_defaults,
14532 bool in_block,
14533 uint16_t depth
14534) {
14535 pm_do_loop_stack_push(parser, false);
14536
14537 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14538 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14539
14540 while (true) {
14541 bool parsing = true;
14542
14543 switch (parser->current.type) {
14544 case PM_TOKEN_PARENTHESIS_LEFT: {
14545 update_parameter_state(parser, &parser->current, &order);
14546 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14547
14548 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14549 pm_parameters_node_requireds_append(params, param);
14550 } else {
14551 pm_parameters_node_posts_append(params, param);
14552 }
14553 break;
14554 }
14555 case PM_TOKEN_UAMPERSAND:
14556 case PM_TOKEN_AMPERSAND: {
14557 update_parameter_state(parser, &parser->current, &order);
14558 parser_lex(parser);
14559
14560 pm_token_t operator = parser->previous;
14561 pm_token_t name;
14562
14563 bool repeated = false;
14564 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14565 name = parser->previous;
14566 repeated = pm_parser_parameter_name_check(parser, &name);
14567 pm_parser_local_add_token(parser, &name, 1);
14568 } else {
14569 name = not_provided(parser);
14570 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14571 }
14572
14573 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14574 if (repeated) {
14575 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14576 }
14577 if (params->block == NULL) {
14578 pm_parameters_node_block_set(params, param);
14579 } else {
14580 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14581 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14582 }
14583
14584 break;
14585 }
14586 case PM_TOKEN_UDOT_DOT_DOT: {
14587 if (!allows_forwarding_parameters) {
14588 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14589 }
14590
14591 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14592 parser_lex(parser);
14593
14594 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14595 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14596
14597 if (params->keyword_rest != NULL) {
14598 // If we already have a keyword rest parameter, then we replace it with the
14599 // forwarding parameter and move the keyword rest parameter to the posts list.
14600 pm_node_t *keyword_rest = params->keyword_rest;
14601 pm_parameters_node_posts_append(params, keyword_rest);
14602 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14603 params->keyword_rest = NULL;
14604 }
14605
14606 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14607 break;
14608 }
14609 case PM_TOKEN_CLASS_VARIABLE:
14610 case PM_TOKEN_IDENTIFIER:
14611 case PM_TOKEN_CONSTANT:
14612 case PM_TOKEN_INSTANCE_VARIABLE:
14613 case PM_TOKEN_GLOBAL_VARIABLE:
14614 case PM_TOKEN_METHOD_NAME: {
14615 parser_lex(parser);
14616 switch (parser->previous.type) {
14617 case PM_TOKEN_CONSTANT:
14618 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14619 break;
14620 case PM_TOKEN_INSTANCE_VARIABLE:
14621 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14622 break;
14623 case PM_TOKEN_GLOBAL_VARIABLE:
14624 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14625 break;
14626 case PM_TOKEN_CLASS_VARIABLE:
14627 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14628 break;
14629 case PM_TOKEN_METHOD_NAME:
14630 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14631 break;
14632 default: break;
14633 }
14634
14635 if (parser->current.type == PM_TOKEN_EQUAL) {
14636 update_parameter_state(parser, &parser->current, &order);
14637 } else {
14638 update_parameter_state(parser, &parser->previous, &order);
14639 }
14640
14641 pm_token_t name = parser->previous;
14642 bool repeated = pm_parser_parameter_name_check(parser, &name);
14643 pm_parser_local_add_token(parser, &name, 1);
14644
14645 if (match1(parser, PM_TOKEN_EQUAL)) {
14646 pm_token_t operator = parser->current;
14647 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14648 parser_lex(parser);
14649
14650 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14651 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14652
14653 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14654 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14655 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14656
14657 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14658
14659 if (repeated) {
14660 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14661 }
14662 pm_parameters_node_optionals_append(params, param);
14663
14664 // If the value of the parameter increased the number of
14665 // reads of that parameter, then we need to warn that we
14666 // have a circular definition.
14667 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14668 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14669 }
14670
14671 context_pop(parser);
14672
14673 // If parsing the value of the parameter resulted in error recovery,
14674 // then we can put a missing node in its place and stop parsing the
14675 // parameters entirely now.
14676 if (parser->recovering) {
14677 parsing = false;
14678 break;
14679 }
14680 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14681 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14682 if (repeated) {
14683 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14684 }
14685 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14686 } else {
14687 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14688 if (repeated) {
14689 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14690 }
14691 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14692 }
14693
14694 break;
14695 }
14696 case PM_TOKEN_LABEL: {
14697 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14698 update_parameter_state(parser, &parser->current, &order);
14699
14700 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14701 parser_lex(parser);
14702
14703 pm_token_t name = parser->previous;
14704 pm_token_t local = name;
14705 local.end -= 1;
14706
14707 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14708 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14709 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14710 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14711 }
14712
14713 bool repeated = pm_parser_parameter_name_check(parser, &local);
14714 pm_parser_local_add_token(parser, &local, 1);
14715
14716 switch (parser->current.type) {
14717 case PM_TOKEN_COMMA:
14718 case PM_TOKEN_PARENTHESIS_RIGHT:
14719 case PM_TOKEN_PIPE: {
14720 context_pop(parser);
14721
14722 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14723 if (repeated) {
14724 pm_node_flag_set_repeated_parameter(param);
14725 }
14726
14727 pm_parameters_node_keywords_append(params, param);
14728 break;
14729 }
14730 case PM_TOKEN_SEMICOLON:
14731 case PM_TOKEN_NEWLINE: {
14732 context_pop(parser);
14733
14734 if (uses_parentheses) {
14735 parsing = false;
14736 break;
14737 }
14738
14739 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14740 if (repeated) {
14741 pm_node_flag_set_repeated_parameter(param);
14742 }
14743
14744 pm_parameters_node_keywords_append(params, param);
14745 break;
14746 }
14747 default: {
14748 pm_node_t *param;
14749
14750 if (token_begins_expression_p(parser->current.type)) {
14751 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14752 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14753
14754 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14755 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14756 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14757
14758 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14759 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14760 }
14761
14762 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14763 }
14764 else {
14765 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14766 }
14767
14768 if (repeated) {
14769 pm_node_flag_set_repeated_parameter(param);
14770 }
14771
14772 context_pop(parser);
14773 pm_parameters_node_keywords_append(params, param);
14774
14775 // If parsing the value of the parameter resulted in error recovery,
14776 // then we can put a missing node in its place and stop parsing the
14777 // parameters entirely now.
14778 if (parser->recovering) {
14779 parsing = false;
14780 break;
14781 }
14782 }
14783 }
14784
14785 parser->in_keyword_arg = false;
14786 break;
14787 }
14788 case PM_TOKEN_USTAR:
14789 case PM_TOKEN_STAR: {
14790 update_parameter_state(parser, &parser->current, &order);
14791 parser_lex(parser);
14792
14793 pm_token_t operator = parser->previous;
14794 pm_token_t name;
14795 bool repeated = false;
14796
14797 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14798 name = parser->previous;
14799 repeated = pm_parser_parameter_name_check(parser, &name);
14800 pm_parser_local_add_token(parser, &name, 1);
14801 } else {
14802 name = not_provided(parser);
14803 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14804 }
14805
14806 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14807 if (repeated) {
14808 pm_node_flag_set_repeated_parameter(param);
14809 }
14810
14811 if (params->rest == NULL) {
14812 pm_parameters_node_rest_set(params, param);
14813 } else {
14814 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14815 pm_parameters_node_posts_append(params, param);
14816 }
14817
14818 break;
14819 }
14820 case PM_TOKEN_STAR_STAR:
14821 case PM_TOKEN_USTAR_STAR: {
14822 pm_parameters_order_t previous_order = order;
14823 update_parameter_state(parser, &parser->current, &order);
14824 parser_lex(parser);
14825
14826 pm_token_t operator = parser->previous;
14827 pm_node_t *param;
14828
14829 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14830 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14831 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14832 }
14833
14834 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14835 } else {
14836 pm_token_t name;
14837
14838 bool repeated = false;
14839 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14840 name = parser->previous;
14841 repeated = pm_parser_parameter_name_check(parser, &name);
14842 pm_parser_local_add_token(parser, &name, 1);
14843 } else {
14844 name = not_provided(parser);
14845 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14846 }
14847
14848 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14849 if (repeated) {
14850 pm_node_flag_set_repeated_parameter(param);
14851 }
14852 }
14853
14854 if (params->keyword_rest == NULL) {
14855 pm_parameters_node_keyword_rest_set(params, param);
14856 } else {
14857 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14858 pm_parameters_node_posts_append(params, param);
14859 }
14860
14861 break;
14862 }
14863 default:
14864 if (parser->previous.type == PM_TOKEN_COMMA) {
14865 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14866 // If we get here, then we have a trailing comma in a
14867 // block parameter list.
14868 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14869
14870 if (params->rest == NULL) {
14871 pm_parameters_node_rest_set(params, param);
14872 } else {
14873 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14874 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14875 }
14876 } else {
14877 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14878 }
14879 }
14880
14881 parsing = false;
14882 break;
14883 }
14884
14885 // If we hit some kind of issue while parsing the parameter, this would
14886 // have been set to false. In that case, we need to break out of the
14887 // loop.
14888 if (!parsing) break;
14889
14890 bool accepted_newline = false;
14891 if (uses_parentheses) {
14892 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14893 }
14894
14895 if (accept1(parser, PM_TOKEN_COMMA)) {
14896 // If there was a comma, but we also accepted a newline, then this
14897 // is a syntax error.
14898 if (accepted_newline) {
14899 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14900 }
14901 } else {
14902 // If there was no comma, then we're done parsing parameters.
14903 break;
14904 }
14905 }
14906
14907 pm_do_loop_stack_pop(parser);
14908
14909 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14910 if (params->base.location.start == params->base.location.end) {
14911 pm_node_destroy(parser, (pm_node_t *) params);
14912 return NULL;
14913 }
14914
14915 return params;
14916}
14917
14922static size_t
14923token_newline_index(const pm_parser_t *parser) {
14924 if (parser->heredoc_end == NULL) {
14925 // This is the common case. In this case we can look at the previously
14926 // recorded newline in the newline list and subtract from the current
14927 // offset.
14928 return parser->newline_list.size - 1;
14929 } else {
14930 // This is unlikely. This is the case that we have already parsed the
14931 // start of a heredoc, so we cannot rely on looking at the previous
14932 // offset of the newline list, and instead must go through the whole
14933 // process of a binary search for the line number.
14934 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14935 }
14936}
14937
14942static int64_t
14943token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14944 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14945 const uint8_t *end = token->start;
14946
14947 // Skip over the BOM if it is present.
14948 if (
14949 newline_index == 0 &&
14950 parser->start[0] == 0xef &&
14951 parser->start[1] == 0xbb &&
14952 parser->start[2] == 0xbf
14953 ) cursor += 3;
14954
14955 int64_t column = 0;
14956 for (; cursor < end; cursor++) {
14957 switch (*cursor) {
14958 case '\t':
14959 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14960 break;
14961 case ' ':
14962 column++;
14963 break;
14964 default:
14965 column++;
14966 if (break_on_non_space) return -1;
14967 break;
14968 }
14969 }
14970
14971 return column;
14972}
14973
14978static void
14979parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14980 // If these warnings are disabled (unlikely), then we can just return.
14981 if (!parser->warn_mismatched_indentation) return;
14982
14983 // If the tokens are on the same line, we do not warn.
14984 size_t closing_newline_index = token_newline_index(parser);
14985 if (opening_newline_index == closing_newline_index) return;
14986
14987 // If the opening token has anything other than spaces or tabs before it,
14988 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14989 // and the `if` immediately follows an `else` keyword.
14990 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14991 if (!if_after_else && (opening_column == -1)) return;
14992
14993 // Get a reference to the closing token off the current parser. This assumes
14994 // that the caller has placed this in the correct position.
14995 pm_token_t *closing_token = &parser->current;
14996
14997 // If the tokens are at the same indentation, we do not warn.
14998 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14999 if ((closing_column == -1) || (opening_column == closing_column)) return;
15000
15001 // If the closing column is greater than the opening column and we are
15002 // allowing indentation, then we do not warn.
15003 if (allow_indent && (closing_column > opening_column)) return;
15004
15005 // Otherwise, add a warning.
15006 PM_PARSER_WARN_FORMAT(
15007 parser,
15008 closing_token->start,
15009 closing_token->end,
15010 PM_WARN_INDENTATION_MISMATCH,
15011 (int) (closing_token->end - closing_token->start),
15012 (const char *) closing_token->start,
15013 (int) (opening_token->end - opening_token->start),
15014 (const char *) opening_token->start,
15015 ((int32_t) opening_newline_index) + parser->start_line
15016 );
15017}
15018
15019typedef enum {
15020 PM_RESCUES_BEGIN = 1,
15021 PM_RESCUES_BLOCK,
15022 PM_RESCUES_CLASS,
15023 PM_RESCUES_DEF,
15024 PM_RESCUES_LAMBDA,
15025 PM_RESCUES_MODULE,
15026 PM_RESCUES_SCLASS
15027} pm_rescues_type_t;
15028
15033static inline void
15034parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15035 pm_rescue_node_t *current = NULL;
15036
15037 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15038 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15039 parser_lex(parser);
15040
15041 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15042
15043 switch (parser->current.type) {
15044 case PM_TOKEN_EQUAL_GREATER: {
15045 // Here we have an immediate => after the rescue keyword, in which case
15046 // we're going to have an empty list of exceptions to rescue (which
15047 // implies StandardError).
15048 parser_lex(parser);
15049 pm_rescue_node_operator_set(rescue, &parser->previous);
15050
15051 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15052 reference = parse_target(parser, reference, false, false);
15053
15054 pm_rescue_node_reference_set(rescue, reference);
15055 break;
15056 }
15057 case PM_TOKEN_NEWLINE:
15058 case PM_TOKEN_SEMICOLON:
15059 case PM_TOKEN_KEYWORD_THEN:
15060 // Here we have a terminator for the rescue keyword, in which
15061 // case we're going to just continue on.
15062 break;
15063 default: {
15064 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15065 // Here we have something that could be an exception expression, so
15066 // we'll attempt to parse it here and any others delimited by commas.
15067
15068 do {
15069 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15070 pm_rescue_node_exceptions_append(rescue, expression);
15071
15072 // If we hit a newline, then this is the end of the rescue expression. We
15073 // can continue on to parse the statements.
15074 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15075
15076 // If we hit a `=>` then we're going to parse the exception variable. Once
15077 // we've done that, we'll break out of the loop and parse the statements.
15078 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15079 pm_rescue_node_operator_set(rescue, &parser->previous);
15080
15081 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15082 reference = parse_target(parser, reference, false, false);
15083
15084 pm_rescue_node_reference_set(rescue, reference);
15085 break;
15086 }
15087 } while (accept1(parser, PM_TOKEN_COMMA));
15088 }
15089 }
15090 }
15091
15092 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15093 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15094 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15095 }
15096 } else {
15097 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15098 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15099 }
15100
15101 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
15102 pm_accepts_block_stack_push(parser, true);
15103 pm_context_t context;
15104
15105 switch (type) {
15106 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15107 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15108 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15109 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15110 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15111 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15112 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15113 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15114 }
15115
15116 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15117 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15118
15119 pm_accepts_block_stack_pop(parser);
15120 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15121 }
15122
15123 if (current == NULL) {
15124 pm_begin_node_rescue_clause_set(parent_node, rescue);
15125 } else {
15126 pm_rescue_node_subsequent_set(current, rescue);
15127 }
15128
15129 current = rescue;
15130 }
15131
15132 // The end node locations on rescue nodes will not be set correctly
15133 // since we won't know the end until we've found all subsequent
15134 // clauses. This sets the end location on all rescues once we know it.
15135 if (current != NULL) {
15136 const uint8_t *end_to_set = current->base.location.end;
15137 pm_rescue_node_t *clause = parent_node->rescue_clause;
15138
15139 while (clause != NULL) {
15140 clause->base.location.end = end_to_set;
15141 clause = clause->subsequent;
15142 }
15143 }
15144
15145 pm_token_t else_keyword;
15146 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15147 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15148 opening_newline_index = token_newline_index(parser);
15149
15150 else_keyword = parser->current;
15151 opening = &else_keyword;
15152
15153 parser_lex(parser);
15154 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15155
15156 pm_statements_node_t *else_statements = NULL;
15157 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15158 pm_accepts_block_stack_push(parser, true);
15159 pm_context_t context;
15160
15161 switch (type) {
15162 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15163 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15164 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15165 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15166 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15167 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15168 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15169 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15170 }
15171
15172 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15173 pm_accepts_block_stack_pop(parser);
15174
15175 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15176 }
15177
15178 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15179 pm_begin_node_else_clause_set(parent_node, else_clause);
15180
15181 // If we don't have a `current` rescue node, then this is a dangling
15182 // else, and it's an error.
15183 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15184 }
15185
15186 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15187 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15188 pm_token_t ensure_keyword = parser->current;
15189
15190 parser_lex(parser);
15191 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15192
15193 pm_statements_node_t *ensure_statements = NULL;
15194 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15195 pm_accepts_block_stack_push(parser, true);
15196 pm_context_t context;
15197
15198 switch (type) {
15199 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15200 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15201 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15202 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15203 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15204 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15205 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15206 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15207 }
15208
15209 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15210 pm_accepts_block_stack_pop(parser);
15211
15212 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15213 }
15214
15215 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15216 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15217 }
15218
15219 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15220 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15221 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15222 } else {
15223 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15224 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15225 }
15226}
15227
15232static pm_begin_node_t *
15233parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15234 pm_token_t begin_keyword = not_provided(parser);
15235 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15236
15237 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15238 node->base.location.start = start;
15239
15240 return node;
15241}
15242
15247parse_block_parameters(
15248 pm_parser_t *parser,
15249 bool allows_trailing_comma,
15250 const pm_token_t *opening,
15251 bool is_lambda_literal,
15252 bool accepts_blocks_in_defaults,
15253 uint16_t depth
15254) {
15255 pm_parameters_node_t *parameters = NULL;
15256 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15257 parameters = parse_parameters(
15258 parser,
15259 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15260 false,
15261 allows_trailing_comma,
15262 false,
15263 accepts_blocks_in_defaults,
15264 true,
15265 (uint16_t) (depth + 1)
15266 );
15267 }
15268
15269 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15270 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15271 accept1(parser, PM_TOKEN_NEWLINE);
15272
15273 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15274 do {
15275 switch (parser->current.type) {
15276 case PM_TOKEN_CONSTANT:
15277 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15278 parser_lex(parser);
15279 break;
15280 case PM_TOKEN_INSTANCE_VARIABLE:
15281 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15282 parser_lex(parser);
15283 break;
15284 case PM_TOKEN_GLOBAL_VARIABLE:
15285 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15286 parser_lex(parser);
15287 break;
15288 case PM_TOKEN_CLASS_VARIABLE:
15289 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15290 parser_lex(parser);
15291 break;
15292 default:
15293 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15294 break;
15295 }
15296
15297 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15298 pm_parser_local_add_token(parser, &parser->previous, 1);
15299
15300 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15301 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15302
15303 pm_block_parameters_node_append_local(block_parameters, local);
15304 } while (accept1(parser, PM_TOKEN_COMMA));
15305 }
15306 }
15307
15308 return block_parameters;
15309}
15310
15315static bool
15316outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15317 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15318 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15319 }
15320
15321 return false;
15322}
15323
15329static const char * const pm_numbered_parameter_names[] = {
15330 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15331};
15332
15338static pm_node_t *
15339parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15340 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15341
15342 // If we have ordinary parameters, then we will return them as the set of
15343 // parameters.
15344 if (parameters != NULL) {
15345 // If we also have implicit parameters, then this is an error.
15346 if (implicit_parameters->size > 0) {
15347 pm_node_t *node = implicit_parameters->nodes[0];
15348
15349 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15350 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15351 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15352 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15353 } else {
15354 assert(false && "unreachable");
15355 }
15356 }
15357
15358 return parameters;
15359 }
15360
15361 // If we don't have any implicit parameters, then the set of parameters is
15362 // NULL.
15363 if (implicit_parameters->size == 0) {
15364 return NULL;
15365 }
15366
15367 // If we don't have ordinary parameters, then we now must validate our set
15368 // of implicit parameters. We can only have numbered parameters or it, but
15369 // they cannot be mixed.
15370 uint8_t numbered_parameter = 0;
15371 bool it_parameter = false;
15372
15373 for (size_t index = 0; index < implicit_parameters->size; index++) {
15374 pm_node_t *node = implicit_parameters->nodes[index];
15375
15376 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15377 if (it_parameter) {
15378 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15379 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15380 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15381 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15382 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15383 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15384 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15385 } else {
15386 assert(false && "unreachable");
15387 }
15388 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15389 if (numbered_parameter > 0) {
15390 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15391 } else {
15392 it_parameter = true;
15393 }
15394 }
15395 }
15396
15397 if (numbered_parameter > 0) {
15398 // Go through the parent scopes and mark them as being disallowed from
15399 // using numbered parameters because this inner scope is using them.
15400 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15401 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15402 }
15403
15404 const pm_location_t location = { .start = opening->start, .end = closing->end };
15405 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15406 }
15407
15408 if (it_parameter) {
15409 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15410 }
15411
15412 return NULL;
15413}
15414
15418static pm_block_node_t *
15419parse_block(pm_parser_t *parser, uint16_t depth) {
15420 pm_token_t opening = parser->previous;
15421 accept1(parser, PM_TOKEN_NEWLINE);
15422
15423 pm_accepts_block_stack_push(parser, true);
15424 pm_parser_scope_push(parser, false);
15425
15426 pm_block_parameters_node_t *block_parameters = NULL;
15427
15428 if (accept1(parser, PM_TOKEN_PIPE)) {
15429 pm_token_t block_parameters_opening = parser->previous;
15430 if (match1(parser, PM_TOKEN_PIPE)) {
15431 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15432 parser->command_start = true;
15433 parser_lex(parser);
15434 } else {
15435 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15436 accept1(parser, PM_TOKEN_NEWLINE);
15437 parser->command_start = true;
15438 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15439 }
15440
15441 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15442 }
15443
15444 accept1(parser, PM_TOKEN_NEWLINE);
15445 pm_node_t *statements = NULL;
15446
15447 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15448 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15449 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15450 }
15451
15452 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15453 } else {
15454 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15455 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15456 pm_accepts_block_stack_push(parser, true);
15457 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15458 pm_accepts_block_stack_pop(parser);
15459 }
15460
15461 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15462 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15463 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15464 }
15465 }
15466
15467 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15468 }
15469
15470 pm_constant_id_list_t locals;
15471 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15472 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15473
15474 pm_parser_scope_pop(parser);
15475 pm_accepts_block_stack_pop(parser);
15476
15477 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15478}
15479
15485static bool
15486parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15487 bool found = false;
15488
15489 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15490 found |= true;
15491 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15492
15493 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15494 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15495 } else {
15496 pm_accepts_block_stack_push(parser, true);
15497 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15498
15499 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15500 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15501 parser->previous.start = parser->previous.end;
15502 parser->previous.type = PM_TOKEN_MISSING;
15503 }
15504
15505 pm_accepts_block_stack_pop(parser);
15506 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15507 }
15508 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15509 found |= true;
15510 pm_accepts_block_stack_push(parser, false);
15511
15512 // If we get here, then the subsequent token cannot be used as an infix
15513 // operator. In this case we assume the subsequent token is part of an
15514 // argument to this method call.
15515 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15516
15517 // If we have done with the arguments and still not consumed the comma,
15518 // then we have a trailing comma where we need to check whether it is
15519 // allowed or not.
15520 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15521 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15522 }
15523
15524 pm_accepts_block_stack_pop(parser);
15525 }
15526
15527 // If we're at the end of the arguments, we can now check if there is a block
15528 // node that starts with a {. If there is, then we can parse it and add it to
15529 // the arguments.
15530 if (accepts_block) {
15531 pm_block_node_t *block = NULL;
15532
15533 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15534 found |= true;
15535 block = parse_block(parser, (uint16_t) (depth + 1));
15536 pm_arguments_validate_block(parser, arguments, block);
15537 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15538 found |= true;
15539 block = parse_block(parser, (uint16_t) (depth + 1));
15540 }
15541
15542 if (block != NULL) {
15543 if (arguments->block == NULL && !arguments->has_forwarding) {
15544 arguments->block = (pm_node_t *) block;
15545 } else {
15546 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15547
15548 if (arguments->block != NULL) {
15549 if (arguments->arguments == NULL) {
15550 arguments->arguments = pm_arguments_node_create(parser);
15551 }
15552 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15553 }
15554 arguments->block = (pm_node_t *) block;
15555 }
15556 }
15557 }
15558
15559 return found;
15560}
15561
15566static void
15567parse_return(pm_parser_t *parser, pm_node_t *node) {
15568 bool in_sclass = false;
15569 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15570 switch (context_node->context) {
15574 case PM_CONTEXT_BEGIN:
15575 case PM_CONTEXT_CASE_IN:
15578 case PM_CONTEXT_DEFINED:
15579 case PM_CONTEXT_ELSE:
15580 case PM_CONTEXT_ELSIF:
15581 case PM_CONTEXT_EMBEXPR:
15583 case PM_CONTEXT_FOR:
15584 case PM_CONTEXT_IF:
15586 case PM_CONTEXT_MAIN:
15588 case PM_CONTEXT_PARENS:
15589 case PM_CONTEXT_POSTEXE:
15591 case PM_CONTEXT_PREEXE:
15593 case PM_CONTEXT_TERNARY:
15594 case PM_CONTEXT_UNLESS:
15595 case PM_CONTEXT_UNTIL:
15596 case PM_CONTEXT_WHILE:
15597 // Keep iterating up the lists of contexts, because returns can
15598 // see through these.
15599 continue;
15603 case PM_CONTEXT_SCLASS:
15604 in_sclass = true;
15605 continue;
15609 case PM_CONTEXT_CLASS:
15613 case PM_CONTEXT_MODULE:
15614 // These contexts are invalid for a return.
15615 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15616 return;
15626 case PM_CONTEXT_DEF:
15632 // These contexts are valid for a return, and we should not
15633 // continue to loop.
15634 return;
15635 case PM_CONTEXT_NONE:
15636 // This case should never happen.
15637 assert(false && "unreachable");
15638 break;
15639 }
15640 }
15641 if (in_sclass) {
15642 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15643 }
15644}
15645
15650static void
15651parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15652 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15653 switch (context_node->context) {
15659 case PM_CONTEXT_DEFINED:
15660 case PM_CONTEXT_FOR:
15667 case PM_CONTEXT_POSTEXE:
15668 case PM_CONTEXT_UNTIL:
15669 case PM_CONTEXT_WHILE:
15670 // These are the good cases. We're allowed to have a block exit
15671 // in these contexts.
15672 return;
15673 case PM_CONTEXT_DEF:
15678 case PM_CONTEXT_MAIN:
15679 case PM_CONTEXT_PREEXE:
15680 case PM_CONTEXT_SCLASS:
15684 // These are the bad cases. We're not allowed to have a block
15685 // exit in these contexts.
15686 //
15687 // If we get here, then we're about to mark this block exit
15688 // as invalid. However, it could later _become_ valid if we
15689 // find a trailing while/until on the expression. In this
15690 // case instead of adding the error here, we'll add the
15691 // block exit to the list of exits for the expression, and
15692 // the node parsing will handle validating it instead.
15693 assert(parser->current_block_exits != NULL);
15694 pm_node_list_append(parser->current_block_exits, node);
15695 return;
15699 case PM_CONTEXT_BEGIN:
15700 case PM_CONTEXT_CASE_IN:
15705 case PM_CONTEXT_CLASS:
15707 case PM_CONTEXT_ELSE:
15708 case PM_CONTEXT_ELSIF:
15709 case PM_CONTEXT_EMBEXPR:
15711 case PM_CONTEXT_IF:
15715 case PM_CONTEXT_MODULE:
15717 case PM_CONTEXT_PARENS:
15720 case PM_CONTEXT_TERNARY:
15721 case PM_CONTEXT_UNLESS:
15722 // In these contexts we should continue walking up the list of
15723 // contexts.
15724 break;
15725 case PM_CONTEXT_NONE:
15726 // This case should never happen.
15727 assert(false && "unreachable");
15728 break;
15729 }
15730 }
15731}
15732
15737static pm_node_list_t *
15738push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15739 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15740 parser->current_block_exits = current_block_exits;
15741 return previous_block_exits;
15742}
15743
15749static void
15750flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15751 pm_node_t *block_exit;
15752 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15753 const char *type;
15754
15755 switch (PM_NODE_TYPE(block_exit)) {
15756 case PM_BREAK_NODE: type = "break"; break;
15757 case PM_NEXT_NODE: type = "next"; break;
15758 case PM_REDO_NODE: type = "redo"; break;
15759 default: assert(false && "unreachable"); type = ""; break;
15760 }
15761
15762 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15763 }
15764
15765 parser->current_block_exits = previous_block_exits;
15766}
15767
15772static void
15773pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15774 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15775 // If we matched a trailing while/until, then all of the block exits in
15776 // the contained list are valid. In this case we do not need to do
15777 // anything.
15778 parser->current_block_exits = previous_block_exits;
15779 } else if (previous_block_exits != NULL) {
15780 // If we did not matching a trailing while/until, then all of the block
15781 // exits contained in the list are invalid for this specific context.
15782 // However, they could still become valid in a higher level context if
15783 // there is another list above this one. In this case we'll push all of
15784 // the block exits up to the previous list.
15785 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15786 parser->current_block_exits = previous_block_exits;
15787 } else {
15788 // If we did not match a trailing while/until and this was the last
15789 // chance to do so, then all of the block exits in the list are invalid
15790 // and we need to add an error for each of them.
15791 flush_block_exits(parser, previous_block_exits);
15792 }
15793}
15794
15795static inline pm_node_t *
15796parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15797 context_push(parser, PM_CONTEXT_PREDICATE);
15798 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15799 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15800
15801 // Predicates are closed by a term, a "then", or a term and then a "then".
15802 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15803
15804 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15805 predicate_closed = true;
15806 *then_keyword = parser->previous;
15807 }
15808
15809 if (!predicate_closed) {
15810 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15811 }
15812
15813 context_pop(parser);
15814 return predicate;
15815}
15816
15817static inline pm_node_t *
15818parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15819 pm_node_list_t current_block_exits = { 0 };
15820 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15821
15822 pm_token_t keyword = parser->previous;
15823 pm_token_t then_keyword = not_provided(parser);
15824
15825 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15826 pm_statements_node_t *statements = NULL;
15827
15828 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15829 pm_accepts_block_stack_push(parser, true);
15830 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15831 pm_accepts_block_stack_pop(parser);
15832 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15833 }
15834
15835 pm_token_t end_keyword = not_provided(parser);
15836 pm_node_t *parent = NULL;
15837
15838 switch (context) {
15839 case PM_CONTEXT_IF:
15840 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15841 break;
15842 case PM_CONTEXT_UNLESS:
15843 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15844 break;
15845 default:
15846 assert(false && "unreachable");
15847 break;
15848 }
15849
15850 pm_node_t *current = parent;
15851
15852 // Parse any number of elsif clauses. This will form a linked list of if
15853 // nodes pointing to each other from the top.
15854 if (context == PM_CONTEXT_IF) {
15855 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15856 if (parser_end_of_line_p(parser)) {
15857 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15858 }
15859
15860 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15861 pm_token_t elsif_keyword = parser->current;
15862 parser_lex(parser);
15863
15864 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15865 pm_accepts_block_stack_push(parser, true);
15866
15867 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15868 pm_accepts_block_stack_pop(parser);
15869 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15870
15871 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15872 ((pm_if_node_t *) current)->subsequent = elsif;
15873 current = elsif;
15874 }
15875 }
15876
15877 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15878 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15879 opening_newline_index = token_newline_index(parser);
15880
15881 parser_lex(parser);
15882 pm_token_t else_keyword = parser->previous;
15883
15884 pm_accepts_block_stack_push(parser, true);
15885 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15886 pm_accepts_block_stack_pop(parser);
15887
15888 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15889 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15890 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15891
15892 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15893
15894 switch (context) {
15895 case PM_CONTEXT_IF:
15896 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15897 break;
15898 case PM_CONTEXT_UNLESS:
15899 ((pm_unless_node_t *) parent)->else_clause = else_node;
15900 break;
15901 default:
15902 assert(false && "unreachable");
15903 break;
15904 }
15905 } else {
15906 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15907 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15908 }
15909
15910 // Set the appropriate end location for all of the nodes in the subtree.
15911 switch (context) {
15912 case PM_CONTEXT_IF: {
15913 pm_node_t *current = parent;
15914 bool recursing = true;
15915
15916 while (recursing) {
15917 switch (PM_NODE_TYPE(current)) {
15918 case PM_IF_NODE:
15919 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15920 current = ((pm_if_node_t *) current)->subsequent;
15921 recursing = current != NULL;
15922 break;
15923 case PM_ELSE_NODE:
15924 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15925 recursing = false;
15926 break;
15927 default: {
15928 recursing = false;
15929 break;
15930 }
15931 }
15932 }
15933 break;
15934 }
15935 case PM_CONTEXT_UNLESS:
15936 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15937 break;
15938 default:
15939 assert(false && "unreachable");
15940 break;
15941 }
15942
15943 pop_block_exits(parser, previous_block_exits);
15944 pm_node_list_free(&current_block_exits);
15945
15946 return parent;
15947}
15948
15953#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15954 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15955 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15956 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15957 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15958 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15959 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15960 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15961 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15962 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15963 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15964
15969#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15970 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15971 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15972 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15973 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15974 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15975 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15976 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15977
15983#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15984 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15985 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15986 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15987 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15988 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15989 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15990 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15991 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15992
15997#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15998 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15999 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16000 case PM_TOKEN_CLASS_VARIABLE
16001
16006#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16007 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16008 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16009 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16010
16011// Assert here that the flags are the same so that we can safely switch the type
16012// of the node without having to move the flags.
16013PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16014
16019static inline pm_node_flags_t
16020parse_unescaped_encoding(const pm_parser_t *parser) {
16021 if (parser->explicit_encoding != NULL) {
16023 // If the there's an explicit encoding and it's using a UTF-8 escape
16024 // sequence, then mark the string as UTF-8.
16025 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
16026 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16027 // If there's a non-UTF-8 escape sequence being used, then the
16028 // string uses the source encoding, unless the source is marked as
16029 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16030 // order to keep the string valid.
16031 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
16032 }
16033 }
16034 return 0;
16035}
16036
16041static pm_node_t *
16042parse_string_part(pm_parser_t *parser, uint16_t depth) {
16043 switch (parser->current.type) {
16044 // Here the lexer has returned to us plain string content. In this case
16045 // we'll create a string node that has no opening or closing and return that
16046 // as the part. These kinds of parts look like:
16047 //
16048 // "aaa #{bbb} #@ccc ddd"
16049 // ^^^^ ^ ^^^^
16050 case PM_TOKEN_STRING_CONTENT: {
16051 pm_token_t opening = not_provided(parser);
16052 pm_token_t closing = not_provided(parser);
16053
16054 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16055 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16056
16057 parser_lex(parser);
16058 return node;
16059 }
16060 // Here the lexer has returned the beginning of an embedded expression. In
16061 // that case we'll parse the inner statements and return that as the part.
16062 // These kinds of parts look like:
16063 //
16064 // "aaa #{bbb} #@ccc ddd"
16065 // ^^^^^^
16066 case PM_TOKEN_EMBEXPR_BEGIN: {
16067 // Ruby disallows seeing encoding around interpolation in strings,
16068 // even though it is known at parse time.
16069 parser->explicit_encoding = NULL;
16070
16071 pm_lex_state_t state = parser->lex_state;
16072 int brace_nesting = parser->brace_nesting;
16073
16074 parser->brace_nesting = 0;
16075 lex_state_set(parser, PM_LEX_STATE_BEG);
16076 parser_lex(parser);
16077
16078 pm_token_t opening = parser->previous;
16079 pm_statements_node_t *statements = NULL;
16080
16081 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16082 pm_accepts_block_stack_push(parser, true);
16083 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16084 pm_accepts_block_stack_pop(parser);
16085 }
16086
16087 parser->brace_nesting = brace_nesting;
16088 lex_state_set(parser, state);
16089
16090 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16091 pm_token_t closing = parser->previous;
16092
16093 // If this set of embedded statements only contains a single
16094 // statement, then Ruby does not consider it as a possible statement
16095 // that could emit a line event.
16096 if (statements != NULL && statements->body.size == 1) {
16097 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16098 }
16099
16100 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16101 }
16102
16103 // Here the lexer has returned the beginning of an embedded variable.
16104 // In that case we'll parse the variable and create an appropriate node
16105 // for it and then return that node. These kinds of parts look like:
16106 //
16107 // "aaa #{bbb} #@ccc ddd"
16108 // ^^^^^
16109 case PM_TOKEN_EMBVAR: {
16110 // Ruby disallows seeing encoding around interpolation in strings,
16111 // even though it is known at parse time.
16112 parser->explicit_encoding = NULL;
16113
16114 lex_state_set(parser, PM_LEX_STATE_BEG);
16115 parser_lex(parser);
16116
16117 pm_token_t operator = parser->previous;
16118 pm_node_t *variable;
16119
16120 switch (parser->current.type) {
16121 // In this case a back reference is being interpolated. We'll
16122 // create a global variable read node.
16123 case PM_TOKEN_BACK_REFERENCE:
16124 parser_lex(parser);
16125 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16126 break;
16127 // In this case an nth reference is being interpolated. We'll
16128 // create a global variable read node.
16129 case PM_TOKEN_NUMBERED_REFERENCE:
16130 parser_lex(parser);
16131 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16132 break;
16133 // In this case a global variable is being interpolated. We'll
16134 // create a global variable read node.
16135 case PM_TOKEN_GLOBAL_VARIABLE:
16136 parser_lex(parser);
16137 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16138 break;
16139 // In this case an instance variable is being interpolated.
16140 // We'll create an instance variable read node.
16141 case PM_TOKEN_INSTANCE_VARIABLE:
16142 parser_lex(parser);
16143 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16144 break;
16145 // In this case a class variable is being interpolated. We'll
16146 // create a class variable read node.
16147 case PM_TOKEN_CLASS_VARIABLE:
16148 parser_lex(parser);
16149 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16150 break;
16151 // We can hit here if we got an invalid token. In that case
16152 // we'll not attempt to lex this token and instead just return a
16153 // missing node.
16154 default:
16155 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16156 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16157 break;
16158 }
16159
16160 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16161 }
16162 default:
16163 parser_lex(parser);
16164 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16165 return NULL;
16166 }
16167}
16168
16174static const uint8_t *
16175parse_operator_symbol_name(const pm_token_t *name) {
16176 switch (name->type) {
16177 case PM_TOKEN_TILDE:
16178 case PM_TOKEN_BANG:
16179 if (name->end[-1] == '@') return name->end - 1;
16181 default:
16182 return name->end;
16183 }
16184}
16185
16186static pm_node_t *
16187parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16188 pm_token_t closing = not_provided(parser);
16189 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16190
16191 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16192
16193 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16194 parser_lex(parser);
16195
16196 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16197 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16198
16199 return (pm_node_t *) symbol;
16200}
16201
16207static pm_node_t *
16208parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16209 const pm_token_t opening = parser->previous;
16210
16211 if (lex_mode->mode != PM_LEX_STRING) {
16212 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16213
16214 switch (parser->current.type) {
16215 case PM_CASE_OPERATOR:
16216 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16217 case PM_TOKEN_IDENTIFIER:
16218 case PM_TOKEN_CONSTANT:
16219 case PM_TOKEN_INSTANCE_VARIABLE:
16220 case PM_TOKEN_METHOD_NAME:
16221 case PM_TOKEN_CLASS_VARIABLE:
16222 case PM_TOKEN_GLOBAL_VARIABLE:
16223 case PM_TOKEN_NUMBERED_REFERENCE:
16224 case PM_TOKEN_BACK_REFERENCE:
16225 case PM_CASE_KEYWORD:
16226 parser_lex(parser);
16227 break;
16228 default:
16229 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16230 break;
16231 }
16232
16233 pm_token_t closing = not_provided(parser);
16234 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16235
16236 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16237 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16238
16239 return (pm_node_t *) symbol;
16240 }
16241
16242 if (lex_mode->as.string.interpolation) {
16243 // If we have the end of the symbol, then we can return an empty symbol.
16244 if (match1(parser, PM_TOKEN_STRING_END)) {
16245 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16246 parser_lex(parser);
16247
16248 pm_token_t content = not_provided(parser);
16249 pm_token_t closing = parser->previous;
16250 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16251 }
16252
16253 // Now we can parse the first part of the symbol.
16254 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16255
16256 // If we got a string part, then it's possible that we could transform
16257 // what looks like an interpolated symbol into a regular symbol.
16258 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16259 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16260 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16261
16262 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16263 }
16264
16265 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16266 if (part) pm_interpolated_symbol_node_append(symbol, part);
16267
16268 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16269 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16270 pm_interpolated_symbol_node_append(symbol, part);
16271 }
16272 }
16273
16274 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16275 if (match1(parser, PM_TOKEN_EOF)) {
16276 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16277 } else {
16278 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16279 }
16280
16281 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16282 return (pm_node_t *) symbol;
16283 }
16284
16285 pm_token_t content;
16286 pm_string_t unescaped;
16287
16288 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16289 content = parser->current;
16290 unescaped = parser->current_string;
16291 parser_lex(parser);
16292
16293 // If we have two string contents in a row, then the content of this
16294 // symbol is split because of heredoc contents. This looks like:
16295 //
16296 // <<A; :'a
16297 // A
16298 // b'
16299 //
16300 // In this case, the best way we have to represent this is as an
16301 // interpolated string node, so that's what we'll do here.
16302 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16303 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16304 pm_token_t bounds = not_provided(parser);
16305
16306 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16307 pm_interpolated_symbol_node_append(symbol, part);
16308
16309 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16310 pm_interpolated_symbol_node_append(symbol, part);
16311
16312 if (next_state != PM_LEX_STATE_NONE) {
16313 lex_state_set(parser, next_state);
16314 }
16315
16316 parser_lex(parser);
16317 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16318
16319 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16320 return (pm_node_t *) symbol;
16321 }
16322 } else {
16323 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16324 pm_string_shared_init(&unescaped, content.start, content.end);
16325 }
16326
16327 if (next_state != PM_LEX_STATE_NONE) {
16328 lex_state_set(parser, next_state);
16329 }
16330
16331 if (match1(parser, PM_TOKEN_EOF)) {
16332 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16333 } else {
16334 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16335 }
16336
16337 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16338}
16339
16344static inline pm_node_t *
16345parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16346 switch (parser->current.type) {
16347 case PM_CASE_OPERATOR: {
16348 const pm_token_t opening = not_provided(parser);
16349 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16350 }
16351 case PM_CASE_KEYWORD:
16352 case PM_TOKEN_CONSTANT:
16353 case PM_TOKEN_IDENTIFIER:
16354 case PM_TOKEN_METHOD_NAME: {
16355 parser_lex(parser);
16356
16357 pm_token_t opening = not_provided(parser);
16358 pm_token_t closing = not_provided(parser);
16359 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16360
16361 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16362 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16363
16364 return (pm_node_t *) symbol;
16365 }
16366 case PM_TOKEN_SYMBOL_BEGIN: {
16367 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16368 parser_lex(parser);
16369
16370 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16371 }
16372 default:
16373 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16374 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16375 }
16376}
16377
16384static inline pm_node_t *
16385parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16386 switch (parser->current.type) {
16387 case PM_CASE_OPERATOR: {
16388 const pm_token_t opening = not_provided(parser);
16389 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16390 }
16391 case PM_CASE_KEYWORD:
16392 case PM_TOKEN_CONSTANT:
16393 case PM_TOKEN_IDENTIFIER:
16394 case PM_TOKEN_METHOD_NAME: {
16395 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16396 parser_lex(parser);
16397
16398 pm_token_t opening = not_provided(parser);
16399 pm_token_t closing = not_provided(parser);
16400 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16401
16402 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16403 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16404
16405 return (pm_node_t *) symbol;
16406 }
16407 case PM_TOKEN_SYMBOL_BEGIN: {
16408 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16409 parser_lex(parser);
16410
16411 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16412 }
16413 case PM_TOKEN_BACK_REFERENCE:
16414 parser_lex(parser);
16415 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16416 case PM_TOKEN_NUMBERED_REFERENCE:
16417 parser_lex(parser);
16418 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16419 case PM_TOKEN_GLOBAL_VARIABLE:
16420 parser_lex(parser);
16421 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16422 default:
16423 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16424 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16425 }
16426}
16427
16432static pm_node_t *
16433parse_variable(pm_parser_t *parser) {
16434 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16435 int depth;
16436 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16437
16438 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16439 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16440 }
16441
16442 pm_scope_t *current_scope = parser->current_scope;
16443 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16444 if (is_numbered_param) {
16445 // When you use a numbered parameter, it implies the existence of
16446 // all of the locals that exist before it. For example, referencing
16447 // _2 means that _1 must exist. Therefore here we loop through all
16448 // of the possibilities and add them into the constant pool.
16449 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16450 for (uint8_t number = 1; number <= maximum; number++) {
16451 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16452 }
16453
16454 if (!match1(parser, PM_TOKEN_EQUAL)) {
16455 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16456 }
16457
16458 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16459 pm_node_list_append(&current_scope->implicit_parameters, node);
16460
16461 return node;
16462 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16463 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16464 pm_node_list_append(&current_scope->implicit_parameters, node);
16465
16466 return node;
16467 }
16468 }
16469
16470 return NULL;
16471}
16472
16476static pm_node_t *
16477parse_variable_call(pm_parser_t *parser) {
16478 pm_node_flags_t flags = 0;
16479
16480 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16481 pm_node_t *node = parse_variable(parser);
16482 if (node != NULL) return node;
16483 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16484 }
16485
16486 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16487 pm_node_flag_set((pm_node_t *)node, flags);
16488
16489 return (pm_node_t *) node;
16490}
16491
16497static inline pm_token_t
16498parse_method_definition_name(pm_parser_t *parser) {
16499 switch (parser->current.type) {
16500 case PM_CASE_KEYWORD:
16501 case PM_TOKEN_CONSTANT:
16502 case PM_TOKEN_METHOD_NAME:
16503 parser_lex(parser);
16504 return parser->previous;
16505 case PM_TOKEN_IDENTIFIER:
16506 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16507 parser_lex(parser);
16508 return parser->previous;
16509 case PM_CASE_OPERATOR:
16510 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16511 parser_lex(parser);
16512 return parser->previous;
16513 default:
16514 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16515 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16516 }
16517}
16518
16519static void
16520parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16521 // Get a reference to the string struct that is being held by the string
16522 // node. This is the value we're going to actually manipulate.
16523 pm_string_ensure_owned(string);
16524
16525 // Now get the bounds of the existing string. We'll use this as a
16526 // destination to move bytes into. We'll also use it for bounds checking
16527 // since we don't require that these strings be null terminated.
16528 size_t dest_length = pm_string_length(string);
16529 const uint8_t *source_cursor = (uint8_t *) string->source;
16530 const uint8_t *source_end = source_cursor + dest_length;
16531
16532 // We're going to move bytes backward in the string when we get leading
16533 // whitespace, so we'll maintain a pointer to the current position in the
16534 // string that we're writing to.
16535 size_t trimmed_whitespace = 0;
16536
16537 // While we haven't reached the amount of common whitespace that we need to
16538 // trim and we haven't reached the end of the string, we'll keep trimming
16539 // whitespace. Trimming in this context means skipping over these bytes such
16540 // that they aren't copied into the new string.
16541 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16542 if (*source_cursor == '\t') {
16543 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16544 if (trimmed_whitespace > common_whitespace) break;
16545 } else {
16546 trimmed_whitespace++;
16547 }
16548
16549 source_cursor++;
16550 dest_length--;
16551 }
16552
16553 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16554 string->length = dest_length;
16555}
16556
16560static void
16561parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16562 // The next node should be dedented if it's the first node in the list or if
16563 // it follows a string node.
16564 bool dedent_next = true;
16565
16566 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16567 // keep around two indices: a read and a write. If we end up trimming all of
16568 // the whitespace from a node, then we'll drop it from the list entirely.
16569 size_t write_index = 0;
16570
16571 pm_node_t *node;
16572 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16573 // We're not manipulating child nodes that aren't strings. In this case
16574 // we'll skip past it and indicate that the subsequent node should not
16575 // be dedented.
16576 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16577 nodes->nodes[write_index++] = node;
16578 dedent_next = false;
16579 continue;
16580 }
16581
16582 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16583 if (dedent_next) {
16584 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16585 }
16586
16587 if (string_node->unescaped.length == 0) {
16588 pm_node_destroy(parser, node);
16589 } else {
16590 nodes->nodes[write_index++] = node;
16591 }
16592
16593 // We always dedent the next node if it follows a string node.
16594 dedent_next = true;
16595 }
16596
16597 nodes->size = write_index;
16598}
16599
16603static pm_token_t
16604parse_strings_empty_content(const uint8_t *location) {
16605 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16606}
16607
16611static inline pm_node_t *
16612parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16613 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16614 bool concating = false;
16615
16616 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16617 pm_node_t *node = NULL;
16618
16619 // Here we have found a string literal. We'll parse it and add it to
16620 // the list of strings.
16621 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16622 assert(lex_mode->mode == PM_LEX_STRING);
16623 bool lex_interpolation = lex_mode->as.string.interpolation;
16624 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16625
16626 pm_token_t opening = parser->current;
16627 parser_lex(parser);
16628
16629 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16630 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16631 // If we get here, then we have an end immediately after a
16632 // start. In that case we'll create an empty content token and
16633 // return an uninterpolated string.
16634 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16635 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16636
16637 pm_string_shared_init(&string->unescaped, content.start, content.end);
16638 node = (pm_node_t *) string;
16639 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16640 // If we get here, then we have an end of a label immediately
16641 // after a start. In that case we'll create an empty symbol
16642 // node.
16643 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16644 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16645
16646 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16647 node = (pm_node_t *) symbol;
16648
16649 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16650 } else if (!lex_interpolation) {
16651 // If we don't accept interpolation then we expect the string to
16652 // start with a single string content node.
16653 pm_string_t unescaped;
16654 pm_token_t content;
16655
16656 if (match1(parser, PM_TOKEN_EOF)) {
16657 unescaped = PM_STRING_EMPTY;
16658 content = not_provided(parser);
16659 } else {
16660 unescaped = parser->current_string;
16661 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16662 content = parser->previous;
16663 }
16664
16665 // It is unfortunately possible to have multiple string content
16666 // nodes in a row in the case that there's heredoc content in
16667 // the middle of the string, like this cursed example:
16668 //
16669 // <<-END+'b
16670 // a
16671 // END
16672 // c'+'d'
16673 //
16674 // In that case we need to switch to an interpolated string to
16675 // be able to contain all of the parts.
16676 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16677 pm_node_list_t parts = { 0 };
16678
16679 pm_token_t delimiters = not_provided(parser);
16680 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16681 pm_node_list_append(&parts, part);
16682
16683 do {
16684 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16685 pm_node_list_append(&parts, part);
16686 parser_lex(parser);
16687 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16688
16689 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16690 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16691
16692 pm_node_list_free(&parts);
16693 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16694 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16695 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16696 } else if (match1(parser, PM_TOKEN_EOF)) {
16697 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16698 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16699 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16700 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16701 } else {
16702 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16703 parser->previous.start = parser->previous.end;
16704 parser->previous.type = PM_TOKEN_MISSING;
16705 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16706 }
16707 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16708 // In this case we've hit string content so we know the string
16709 // at least has something in it. We'll need to check if the
16710 // following token is the end (in which case we can return a
16711 // plain string) or if it's not then it has interpolation.
16712 pm_token_t content = parser->current;
16713 pm_string_t unescaped = parser->current_string;
16714 parser_lex(parser);
16715
16716 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16717 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16718 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16719
16720 // Kind of odd behavior, but basically if we have an
16721 // unterminated string and it ends in a newline, we back up one
16722 // character so that the error message is on the last line of
16723 // content in the string.
16724 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16725 const uint8_t *location = parser->previous.end;
16726 if (location > parser->start && location[-1] == '\n') location--;
16727 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16728
16729 parser->previous.start = parser->previous.end;
16730 parser->previous.type = PM_TOKEN_MISSING;
16731 }
16732 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16733 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16734 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16735 } else {
16736 // If we get here, then we have interpolation so we'll need
16737 // to create a string or symbol node with interpolation.
16738 pm_node_list_t parts = { 0 };
16739 pm_token_t string_opening = not_provided(parser);
16740 pm_token_t string_closing = not_provided(parser);
16741
16742 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16743 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16744 pm_node_list_append(&parts, part);
16745
16746 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16747 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16748 pm_node_list_append(&parts, part);
16749 }
16750 }
16751
16752 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16753 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16754 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16755 } else if (match1(parser, PM_TOKEN_EOF)) {
16756 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16757 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16758 } else {
16759 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16760 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16761 }
16762
16763 pm_node_list_free(&parts);
16764 }
16765 } else {
16766 // If we get here, then the first part of the string is not plain
16767 // string content, in which case we need to parse the string as an
16768 // interpolated string.
16769 pm_node_list_t parts = { 0 };
16770 pm_node_t *part;
16771
16772 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16773 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16774 pm_node_list_append(&parts, part);
16775 }
16776 }
16777
16778 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16779 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16780 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16781 } else if (match1(parser, PM_TOKEN_EOF)) {
16782 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16783 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16784 } else {
16785 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16786 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16787 }
16788
16789 pm_node_list_free(&parts);
16790 }
16791
16792 if (current == NULL) {
16793 // If the node we just parsed is a symbol node, then we can't
16794 // concatenate it with anything else, so we can now return that
16795 // node.
16796 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16797 return node;
16798 }
16799
16800 // If we don't already have a node, then it's fine and we can just
16801 // set the result to be the node we just parsed.
16802 current = node;
16803 } else {
16804 // Otherwise we need to check the type of the node we just parsed.
16805 // If it cannot be concatenated with the previous node, then we'll
16806 // need to add a syntax error.
16807 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16808 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16809 }
16810
16811 // If we haven't already created our container for concatenation,
16812 // we'll do that now.
16813 if (!concating) {
16814 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16815 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16816 }
16817
16818 concating = true;
16819 pm_token_t bounds = not_provided(parser);
16820
16821 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16822 pm_interpolated_string_node_append(container, current);
16823 current = (pm_node_t *) container;
16824 }
16825
16826 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16827 }
16828 }
16829
16830 return current;
16831}
16832
16833#define PM_PARSE_PATTERN_SINGLE 0
16834#define PM_PARSE_PATTERN_TOP 1
16835#define PM_PARSE_PATTERN_MULTI 2
16836
16837static pm_node_t *
16838parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16839
16845static void
16846parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16847 // Skip this capture if it starts with an underscore.
16848 if (*location->start == '_') return;
16849
16850 if (pm_constant_id_list_includes(captures, capture)) {
16851 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16852 } else {
16853 pm_constant_id_list_append(captures, capture);
16854 }
16855}
16856
16860static pm_node_t *
16861parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16862 // Now, if there are any :: operators that follow, parse them as constant
16863 // path nodes.
16864 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16865 pm_token_t delimiter = parser->previous;
16866 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16867 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16868 }
16869
16870 // If there is a [ or ( that follows, then this is part of a larger pattern
16871 // expression. We'll parse the inner pattern here, then modify the returned
16872 // inner pattern with our constant path attached.
16873 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16874 return node;
16875 }
16876
16877 pm_token_t opening;
16878 pm_token_t closing;
16879 pm_node_t *inner = NULL;
16880
16881 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16882 opening = parser->previous;
16883 accept1(parser, PM_TOKEN_NEWLINE);
16884
16885 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16886 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16887 accept1(parser, PM_TOKEN_NEWLINE);
16888 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16889 }
16890
16891 closing = parser->previous;
16892 } else {
16893 parser_lex(parser);
16894 opening = parser->previous;
16895 accept1(parser, PM_TOKEN_NEWLINE);
16896
16897 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16898 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16899 accept1(parser, PM_TOKEN_NEWLINE);
16900 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16901 }
16902
16903 closing = parser->previous;
16904 }
16905
16906 if (!inner) {
16907 // If there was no inner pattern, then we have something like Foo() or
16908 // Foo[]. In that case we'll create an array pattern with no requireds.
16909 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16910 }
16911
16912 // Now that we have the inner pattern, check to see if it's an array, find,
16913 // or hash pattern. If it is, then we'll attach our constant path to it if
16914 // it doesn't already have a constant. If it's not one of those node types
16915 // or it does have a constant, then we'll create an array pattern.
16916 switch (PM_NODE_TYPE(inner)) {
16917 case PM_ARRAY_PATTERN_NODE: {
16918 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16919
16920 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16921 pattern_node->base.location.start = node->location.start;
16922 pattern_node->base.location.end = closing.end;
16923
16924 pattern_node->constant = node;
16925 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16926 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16927
16928 return (pm_node_t *) pattern_node;
16929 }
16930
16931 break;
16932 }
16933 case PM_FIND_PATTERN_NODE: {
16934 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16935
16936 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16937 pattern_node->base.location.start = node->location.start;
16938 pattern_node->base.location.end = closing.end;
16939
16940 pattern_node->constant = node;
16941 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16942 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16943
16944 return (pm_node_t *) pattern_node;
16945 }
16946
16947 break;
16948 }
16949 case PM_HASH_PATTERN_NODE: {
16950 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16951
16952 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16953 pattern_node->base.location.start = node->location.start;
16954 pattern_node->base.location.end = closing.end;
16955
16956 pattern_node->constant = node;
16957 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16958 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16959
16960 return (pm_node_t *) pattern_node;
16961 }
16962
16963 break;
16964 }
16965 default:
16966 break;
16967 }
16968
16969 // If we got here, then we didn't return one of the inner patterns by
16970 // attaching its constant. In this case we'll create an array pattern and
16971 // attach our constant to it.
16972 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16973 pm_array_pattern_node_requireds_append(pattern_node, inner);
16974 return (pm_node_t *) pattern_node;
16975}
16976
16980static pm_splat_node_t *
16981parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16982 assert(parser->previous.type == PM_TOKEN_USTAR);
16983 pm_token_t operator = parser->previous;
16984 pm_node_t *name = NULL;
16985
16986 // Rest patterns don't necessarily have a name associated with them. So we
16987 // will check for that here. If they do, then we'll add it to the local
16988 // table since this pattern will cause it to become a local variable.
16989 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16990 pm_token_t identifier = parser->previous;
16991 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16992
16993 int depth;
16994 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16995 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16996 }
16997
16998 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16999 name = (pm_node_t *) pm_local_variable_target_node_create(
17000 parser,
17001 &PM_LOCATION_TOKEN_VALUE(&identifier),
17002 constant_id,
17003 (uint32_t) (depth == -1 ? 0 : depth)
17004 );
17005 }
17006
17007 // Finally we can return the created node.
17008 return pm_splat_node_create(parser, &operator, name);
17009}
17010
17014static pm_node_t *
17015parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17016 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17017 parser_lex(parser);
17018
17019 pm_token_t operator = parser->previous;
17020 pm_node_t *value = NULL;
17021
17022 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17023 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17024 }
17025
17026 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17027 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17028
17029 int depth;
17030 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17031 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17032 }
17033
17034 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17035 value = (pm_node_t *) pm_local_variable_target_node_create(
17036 parser,
17037 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17038 constant_id,
17039 (uint32_t) (depth == -1 ? 0 : depth)
17040 );
17041 }
17042
17043 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17044}
17045
17050static bool
17051pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17052 ptrdiff_t length = end - start;
17053 if (length == 0) return false;
17054
17055 // First ensure that it starts with a valid identifier starting character.
17056 size_t width = char_is_identifier_start(parser, start, end - start);
17057 if (width == 0) return false;
17058
17059 // Next, ensure that it's not an uppercase character.
17060 if (parser->encoding_changed) {
17061 if (parser->encoding->isupper_char(start, length)) return false;
17062 } else {
17063 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17064 }
17065
17066 // Next, iterate through all of the bytes of the string to ensure that they
17067 // are all valid identifier characters.
17068 const uint8_t *cursor = start + width;
17069 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17070 return cursor == end;
17071}
17072
17077static pm_node_t *
17078parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17079 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17080
17081 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17082 int depth = -1;
17083
17084 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17085 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17086 } else {
17087 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17088
17089 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17090 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17091 }
17092 }
17093
17094 if (depth == -1) {
17095 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17096 }
17097
17098 parse_pattern_capture(parser, captures, constant_id, value_loc);
17099 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17100 parser,
17101 value_loc,
17102 constant_id,
17103 (uint32_t) (depth == -1 ? 0 : depth)
17104 );
17105
17106 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17107}
17108
17113static void
17114parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17115 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17116 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17117 }
17118}
17119
17124parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17125 pm_node_list_t assocs = { 0 };
17126 pm_static_literals_t keys = { 0 };
17127 pm_node_t *rest = NULL;
17128
17129 switch (PM_NODE_TYPE(first_node)) {
17130 case PM_ASSOC_SPLAT_NODE:
17131 case PM_NO_KEYWORDS_PARAMETER_NODE:
17132 rest = first_node;
17133 break;
17134 case PM_SYMBOL_NODE: {
17135 if (pm_symbol_node_label_p(first_node)) {
17136 parse_pattern_hash_key(parser, &keys, first_node);
17137 pm_node_t *value;
17138
17139 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17140 // Otherwise, we will create an implicit local variable
17141 // target for the value.
17142 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17143 } else {
17144 // Here we have a value for the first assoc in the list, so
17145 // we will parse it now.
17146 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17147 }
17148
17149 pm_token_t operator = not_provided(parser);
17150 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17151
17152 pm_node_list_append(&assocs, assoc);
17153 break;
17154 }
17155 }
17157 default: {
17158 // If we get anything else, then this is an error. For this we'll
17159 // create a missing node for the value and create an assoc node for
17160 // the first node in the list.
17161 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17162 pm_parser_err_node(parser, first_node, diag_id);
17163
17164 pm_token_t operator = not_provided(parser);
17165 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17166 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17167
17168 pm_node_list_append(&assocs, assoc);
17169 break;
17170 }
17171 }
17172
17173 // If there are any other assocs, then we'll parse them now.
17174 while (accept1(parser, PM_TOKEN_COMMA)) {
17175 // Here we need to break to support trailing commas.
17176 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17177 // Trailing commas are not allowed to follow a rest pattern.
17178 if (rest != NULL) {
17179 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17180 }
17181
17182 break;
17183 }
17184
17185 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17186 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17187
17188 if (rest == NULL) {
17189 rest = assoc;
17190 } else {
17191 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17192 pm_node_list_append(&assocs, assoc);
17193 }
17194 } else {
17195 pm_node_t *key;
17196
17197 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17198 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17199
17200 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
17201 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17202 } else if (!pm_symbol_node_label_p(key)) {
17203 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17204 }
17205 } else {
17206 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17207 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17208 }
17209
17210 parse_pattern_hash_key(parser, &keys, key);
17211 pm_node_t *value = NULL;
17212
17213 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17214 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17215 } else {
17216 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17217 }
17218
17219 pm_token_t operator = not_provided(parser);
17220 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17221
17222 if (rest != NULL) {
17223 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17224 }
17225
17226 pm_node_list_append(&assocs, assoc);
17227 }
17228 }
17229
17230 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17231 xfree(assocs.nodes);
17232
17233 pm_static_literals_free(&keys);
17234 return node;
17235}
17236
17240static pm_node_t *
17241parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17242 switch (parser->current.type) {
17243 case PM_TOKEN_IDENTIFIER:
17244 case PM_TOKEN_METHOD_NAME: {
17245 parser_lex(parser);
17246 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17247
17248 int depth;
17249 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17250 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17251 }
17252
17253 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17254 return (pm_node_t *) pm_local_variable_target_node_create(
17255 parser,
17256 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17257 constant_id,
17258 (uint32_t) (depth == -1 ? 0 : depth)
17259 );
17260 }
17261 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17262 pm_token_t opening = parser->current;
17263 parser_lex(parser);
17264
17265 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17266 // If we have an empty array pattern, then we'll just return a new
17267 // array pattern node.
17268 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17269 }
17270
17271 // Otherwise, we'll parse the inner pattern, then deal with it depending
17272 // on the type it returns.
17273 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17274
17275 accept1(parser, PM_TOKEN_NEWLINE);
17276 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17277 pm_token_t closing = parser->previous;
17278
17279 switch (PM_NODE_TYPE(inner)) {
17280 case PM_ARRAY_PATTERN_NODE: {
17281 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17282 if (pattern_node->opening_loc.start == NULL) {
17283 pattern_node->base.location.start = opening.start;
17284 pattern_node->base.location.end = closing.end;
17285
17286 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17287 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17288
17289 return (pm_node_t *) pattern_node;
17290 }
17291
17292 break;
17293 }
17294 case PM_FIND_PATTERN_NODE: {
17295 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17296 if (pattern_node->opening_loc.start == NULL) {
17297 pattern_node->base.location.start = opening.start;
17298 pattern_node->base.location.end = closing.end;
17299
17300 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17301 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17302
17303 return (pm_node_t *) pattern_node;
17304 }
17305
17306 break;
17307 }
17308 default:
17309 break;
17310 }
17311
17312 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17313 pm_array_pattern_node_requireds_append(node, inner);
17314 return (pm_node_t *) node;
17315 }
17316 case PM_TOKEN_BRACE_LEFT: {
17317 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17318 parser->pattern_matching_newlines = false;
17319
17321 pm_token_t opening = parser->current;
17322 parser_lex(parser);
17323
17324 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17325 // If we have an empty hash pattern, then we'll just return a new hash
17326 // pattern node.
17327 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17328 } else {
17329 pm_node_t *first_node;
17330
17331 switch (parser->current.type) {
17332 case PM_TOKEN_LABEL:
17333 parser_lex(parser);
17334 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17335 break;
17336 case PM_TOKEN_USTAR_STAR:
17337 first_node = parse_pattern_keyword_rest(parser, captures);
17338 break;
17339 case PM_TOKEN_STRING_BEGIN:
17340 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17341 break;
17342 default: {
17343 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17344 parser_lex(parser);
17345
17346 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17347 break;
17348 }
17349 }
17350
17351 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17352
17353 accept1(parser, PM_TOKEN_NEWLINE);
17354 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17355 pm_token_t closing = parser->previous;
17356
17357 node->base.location.start = opening.start;
17358 node->base.location.end = closing.end;
17359
17360 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17361 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17362 }
17363
17364 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17365 return (pm_node_t *) node;
17366 }
17367 case PM_TOKEN_UDOT_DOT:
17368 case PM_TOKEN_UDOT_DOT_DOT: {
17369 pm_token_t operator = parser->current;
17370 parser_lex(parser);
17371
17372 // Since we have a unary range operator, we need to parse the subsequent
17373 // expression as the right side of the range.
17374 switch (parser->current.type) {
17375 case PM_CASE_PRIMITIVE: {
17376 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17377 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17378 }
17379 default: {
17380 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17381 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17382 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17383 }
17384 }
17385 }
17386 case PM_CASE_PRIMITIVE: {
17387 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17388
17389 // If we found a label, we need to immediately return to the caller.
17390 if (pm_symbol_node_label_p(node)) return node;
17391
17392 // Call nodes (arithmetic operations) are not allowed in patterns
17393 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17394 pm_parser_err_node(parser, node, diag_id);
17395 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17396 pm_node_destroy(parser, node);
17397 return (pm_node_t *) missing_node;
17398 }
17399
17400 // Now that we have a primitive, we need to check if it's part of a range.
17401 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17402 pm_token_t operator = parser->previous;
17403
17404 // Now that we have the operator, we need to check if this is followed
17405 // by another expression. If it is, then we will create a full range
17406 // node. Otherwise, we'll create an endless range.
17407 switch (parser->current.type) {
17408 case PM_CASE_PRIMITIVE: {
17409 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17410 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17411 }
17412 default:
17413 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17414 }
17415 }
17416
17417 return node;
17418 }
17419 case PM_TOKEN_CARET: {
17420 parser_lex(parser);
17421 pm_token_t operator = parser->previous;
17422
17423 // At this point we have a pin operator. We need to check the subsequent
17424 // expression to determine if it's a variable or an expression.
17425 switch (parser->current.type) {
17426 case PM_TOKEN_IDENTIFIER: {
17427 parser_lex(parser);
17428 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17429
17430 if (variable == NULL) {
17431 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17432 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17433 }
17434
17435 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17436 }
17437 case PM_TOKEN_INSTANCE_VARIABLE: {
17438 parser_lex(parser);
17439 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17440
17441 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17442 }
17443 case PM_TOKEN_CLASS_VARIABLE: {
17444 parser_lex(parser);
17445 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17446
17447 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17448 }
17449 case PM_TOKEN_GLOBAL_VARIABLE: {
17450 parser_lex(parser);
17451 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17452
17453 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17454 }
17455 case PM_TOKEN_NUMBERED_REFERENCE: {
17456 parser_lex(parser);
17457 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17458
17459 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17460 }
17461 case PM_TOKEN_BACK_REFERENCE: {
17462 parser_lex(parser);
17463 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17464
17465 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17466 }
17467 case PM_TOKEN_PARENTHESIS_LEFT: {
17468 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17469 parser->pattern_matching_newlines = false;
17470
17471 pm_token_t lparen = parser->current;
17472 parser_lex(parser);
17473
17474 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17475 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17476
17477 accept1(parser, PM_TOKEN_NEWLINE);
17478 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17479 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17480 }
17481 default: {
17482 // If we get here, then we have a pin operator followed by something
17483 // not understood. We'll create a missing node and return that.
17484 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17485 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17486 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17487 }
17488 }
17489 }
17490 case PM_TOKEN_UCOLON_COLON: {
17491 pm_token_t delimiter = parser->current;
17492 parser_lex(parser);
17493
17494 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17495 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17496
17497 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17498 }
17499 case PM_TOKEN_CONSTANT: {
17500 pm_token_t constant = parser->current;
17501 parser_lex(parser);
17502
17503 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17504 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17505 }
17506 default:
17507 pm_parser_err_current(parser, diag_id);
17508 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17509 }
17510}
17511
17516static pm_node_t *
17517parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17518 pm_node_t *node = first_node;
17519
17520 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17521 pm_token_t operator = parser->previous;
17522
17523 switch (parser->current.type) {
17524 case PM_TOKEN_IDENTIFIER:
17525 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17526 case PM_TOKEN_BRACE_LEFT:
17527 case PM_TOKEN_CARET:
17528 case PM_TOKEN_CONSTANT:
17529 case PM_TOKEN_UCOLON_COLON:
17530 case PM_TOKEN_UDOT_DOT:
17531 case PM_TOKEN_UDOT_DOT_DOT:
17532 case PM_CASE_PRIMITIVE: {
17533 if (node == NULL) {
17534 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17535 } else {
17536 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17537 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17538 }
17539
17540 break;
17541 }
17542 case PM_TOKEN_PARENTHESIS_LEFT:
17543 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17544 pm_token_t opening = parser->current;
17545 parser_lex(parser);
17546
17547 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17548 accept1(parser, PM_TOKEN_NEWLINE);
17549 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17550 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17551
17552 if (node == NULL) {
17553 node = right;
17554 } else {
17555 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17556 }
17557
17558 break;
17559 }
17560 default: {
17561 pm_parser_err_current(parser, diag_id);
17562 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17563
17564 if (node == NULL) {
17565 node = right;
17566 } else {
17567 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17568 }
17569
17570 break;
17571 }
17572 }
17573 }
17574
17575 // If we have an =>, then we are assigning this pattern to a variable.
17576 // In this case we should create an assignment node.
17577 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17578 pm_token_t operator = parser->previous;
17579 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17580
17581 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17582 int depth;
17583
17584 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17585 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17586 }
17587
17588 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17589 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17590 parser,
17591 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17592 constant_id,
17593 (uint32_t) (depth == -1 ? 0 : depth)
17594 );
17595
17596 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17597 }
17598
17599 return node;
17600}
17601
17605static pm_node_t *
17606parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17607 pm_node_t *node = NULL;
17608
17609 bool leading_rest = false;
17610 bool trailing_rest = false;
17611
17612 switch (parser->current.type) {
17613 case PM_TOKEN_LABEL: {
17614 parser_lex(parser);
17615 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17616 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17617
17618 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17619 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17620 }
17621
17622 return node;
17623 }
17624 case PM_TOKEN_USTAR_STAR: {
17625 node = parse_pattern_keyword_rest(parser, captures);
17626 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17627
17628 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17629 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17630 }
17631
17632 return node;
17633 }
17634 case PM_TOKEN_STRING_BEGIN: {
17635 // We need special handling for string beginnings because they could
17636 // be dynamic symbols leading to hash patterns.
17637 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17638
17639 if (pm_symbol_node_label_p(node)) {
17640 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17641
17642 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17643 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17644 }
17645
17646 return node;
17647 }
17648
17649 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17650 break;
17651 }
17652 case PM_TOKEN_USTAR: {
17653 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17654 parser_lex(parser);
17655 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17656 leading_rest = true;
17657 break;
17658 }
17659 }
17661 default:
17662 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17663 break;
17664 }
17665
17666 // If we got a dynamic label symbol, then we need to treat it like the
17667 // beginning of a hash pattern.
17668 if (pm_symbol_node_label_p(node)) {
17669 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17670 }
17671
17672 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17673 // If we have a comma, then we are now parsing either an array pattern
17674 // or a find pattern. We need to parse all of the patterns, put them
17675 // into a big list, and then determine which type of node we have.
17676 pm_node_list_t nodes = { 0 };
17677 pm_node_list_append(&nodes, node);
17678
17679 // Gather up all of the patterns into the list.
17680 while (accept1(parser, PM_TOKEN_COMMA)) {
17681 // Break early here in case we have a trailing comma.
17682 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17683 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17684 pm_node_list_append(&nodes, node);
17685 trailing_rest = true;
17686 break;
17687 }
17688
17689 if (accept1(parser, PM_TOKEN_USTAR)) {
17690 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17691
17692 // If we have already parsed a splat pattern, then this is an
17693 // error. We will continue to parse the rest of the patterns,
17694 // but we will indicate it as an error.
17695 if (trailing_rest) {
17696 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17697 }
17698
17699 trailing_rest = true;
17700 } else {
17701 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17702 }
17703
17704 pm_node_list_append(&nodes, node);
17705 }
17706
17707 // If the first pattern and the last pattern are rest patterns, then we
17708 // will call this a find pattern, regardless of how many rest patterns
17709 // are in between because we know we already added the appropriate
17710 // errors. Otherwise we will create an array pattern.
17711 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17712 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17713
17714 if (nodes.size == 2) {
17715 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17716 }
17717 } else {
17718 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17719
17720 if (leading_rest && trailing_rest) {
17721 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17722 }
17723 }
17724
17725 xfree(nodes.nodes);
17726 } else if (leading_rest) {
17727 // Otherwise, if we parsed a single splat pattern, then we know we have
17728 // an array pattern, so we can go ahead and create that node.
17729 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17730 }
17731
17732 return node;
17733}
17734
17740static inline void
17741parse_negative_numeric(pm_node_t *node) {
17742 switch (PM_NODE_TYPE(node)) {
17743 case PM_INTEGER_NODE: {
17744 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17745 cast->base.location.start--;
17746 cast->value.negative = true;
17747 break;
17748 }
17749 case PM_FLOAT_NODE: {
17750 pm_float_node_t *cast = (pm_float_node_t *) node;
17751 cast->base.location.start--;
17752 cast->value = -cast->value;
17753 break;
17754 }
17755 case PM_RATIONAL_NODE: {
17756 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17757 cast->base.location.start--;
17758 cast->numerator.negative = true;
17759 break;
17760 }
17761 case PM_IMAGINARY_NODE:
17762 node->location.start--;
17763 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17764 break;
17765 default:
17766 assert(false && "unreachable");
17767 break;
17768 }
17769}
17770
17776static void
17777pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17778 switch (diag_id) {
17779 case PM_ERR_HASH_KEY: {
17780 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17781 break;
17782 }
17783 case PM_ERR_HASH_VALUE:
17784 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17785 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17786 break;
17787 }
17788 case PM_ERR_UNARY_RECEIVER: {
17789 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17790 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17791 break;
17792 }
17793 case PM_ERR_UNARY_DISALLOWED:
17794 case PM_ERR_EXPECT_ARGUMENT: {
17795 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17796 break;
17797 }
17798 default:
17799 pm_parser_err_previous(parser, diag_id);
17800 break;
17801 }
17802}
17803
17807static void
17808parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17809#define CONTEXT_NONE 0
17810#define CONTEXT_THROUGH_ENSURE 1
17811#define CONTEXT_THROUGH_ELSE 2
17812
17813 pm_context_node_t *context_node = parser->current_context;
17814 int context = CONTEXT_NONE;
17815
17816 while (context_node != NULL) {
17817 switch (context_node->context) {
17825 case PM_CONTEXT_DEFINED:
17827 // These are the good cases. We're allowed to have a retry here.
17828 return;
17829 case PM_CONTEXT_CLASS:
17830 case PM_CONTEXT_DEF:
17832 case PM_CONTEXT_MAIN:
17833 case PM_CONTEXT_MODULE:
17834 case PM_CONTEXT_PREEXE:
17835 case PM_CONTEXT_SCLASS:
17836 // These are the bad cases. We're not allowed to have a retry in
17837 // these contexts.
17838 if (context == CONTEXT_NONE) {
17839 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17840 } else if (context == CONTEXT_THROUGH_ENSURE) {
17841 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17842 } else if (context == CONTEXT_THROUGH_ELSE) {
17843 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17844 }
17845 return;
17853 // These are also bad cases, but with a more specific error
17854 // message indicating the else.
17855 context = CONTEXT_THROUGH_ELSE;
17856 break;
17864 // These are also bad cases, but with a more specific error
17865 // message indicating the ensure.
17866 context = CONTEXT_THROUGH_ENSURE;
17867 break;
17868 case PM_CONTEXT_NONE:
17869 // This case should never happen.
17870 assert(false && "unreachable");
17871 break;
17872 case PM_CONTEXT_BEGIN:
17875 case PM_CONTEXT_CASE_IN:
17878 case PM_CONTEXT_ELSE:
17879 case PM_CONTEXT_ELSIF:
17880 case PM_CONTEXT_EMBEXPR:
17882 case PM_CONTEXT_FOR:
17883 case PM_CONTEXT_IF:
17888 case PM_CONTEXT_PARENS:
17889 case PM_CONTEXT_POSTEXE:
17891 case PM_CONTEXT_TERNARY:
17892 case PM_CONTEXT_UNLESS:
17893 case PM_CONTEXT_UNTIL:
17894 case PM_CONTEXT_WHILE:
17895 // In these contexts we should continue walking up the list of
17896 // contexts.
17897 break;
17898 }
17899
17900 context_node = context_node->prev;
17901 }
17902
17903#undef CONTEXT_NONE
17904#undef CONTEXT_ENSURE
17905#undef CONTEXT_ELSE
17906}
17907
17911static void
17912parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17913 pm_context_node_t *context_node = parser->current_context;
17914
17915 while (context_node != NULL) {
17916 switch (context_node->context) {
17917 case PM_CONTEXT_DEF:
17919 case PM_CONTEXT_DEFINED:
17923 // These are the good cases. We're allowed to have a block exit
17924 // in these contexts.
17925 return;
17926 case PM_CONTEXT_CLASS:
17930 case PM_CONTEXT_MAIN:
17931 case PM_CONTEXT_MODULE:
17935 case PM_CONTEXT_SCLASS:
17939 // These are the bad cases. We're not allowed to have a retry in
17940 // these contexts.
17941 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17942 return;
17943 case PM_CONTEXT_NONE:
17944 // This case should never happen.
17945 assert(false && "unreachable");
17946 break;
17947 case PM_CONTEXT_BEGIN:
17956 case PM_CONTEXT_CASE_IN:
17959 case PM_CONTEXT_ELSE:
17960 case PM_CONTEXT_ELSIF:
17961 case PM_CONTEXT_EMBEXPR:
17963 case PM_CONTEXT_FOR:
17964 case PM_CONTEXT_IF:
17972 case PM_CONTEXT_PARENS:
17973 case PM_CONTEXT_POSTEXE:
17975 case PM_CONTEXT_PREEXE:
17977 case PM_CONTEXT_TERNARY:
17978 case PM_CONTEXT_UNLESS:
17979 case PM_CONTEXT_UNTIL:
17980 case PM_CONTEXT_WHILE:
17981 // In these contexts we should continue walking up the list of
17982 // contexts.
17983 break;
17984 }
17985
17986 context_node = context_node->prev;
17987 }
17988}
17989
17994typedef struct {
17997
17999 const uint8_t *start;
18000
18002 const uint8_t *end;
18003
18012
18017static void
18018parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18020 pm_location_t location;
18021
18022 if (callback_data->shared) {
18023 location = (pm_location_t) { .start = start, .end = end };
18024 } else {
18025 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18026 }
18027
18028 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18029}
18030
18034static void
18035parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18036 const pm_string_t *unescaped = &node->unescaped;
18038 .parser = parser,
18039 .start = node->base.location.start,
18040 .end = node->base.location.end,
18041 .shared = unescaped->type == PM_STRING_SHARED
18042 };
18043
18044 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18045}
18046
18050static inline pm_node_t *
18051parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18052 switch (parser->current.type) {
18053 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
18054 parser_lex(parser);
18055
18056 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18057 pm_accepts_block_stack_push(parser, true);
18058 bool parsed_bare_hash = false;
18059
18060 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18061 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18062
18063 // Handle the case where we don't have a comma and we have a
18064 // newline followed by a right bracket.
18065 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18066 break;
18067 }
18068
18069 // Ensure that we have a comma between elements in the array.
18070 if (array->elements.size > 0) {
18071 if (accept1(parser, PM_TOKEN_COMMA)) {
18072 // If there was a comma but we also accepts a newline,
18073 // then this is a syntax error.
18074 if (accepted_newline) {
18075 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18076 }
18077 } else {
18078 // If there was no comma, then we need to add a syntax
18079 // error.
18080 const uint8_t *location = parser->previous.end;
18081 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18082
18083 parser->previous.start = location;
18084 parser->previous.type = PM_TOKEN_MISSING;
18085 }
18086 }
18087
18088 // If we have a right bracket immediately following a comma,
18089 // this is allowed since it's a trailing comma. In this case we
18090 // can break out of the loop.
18091 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18092
18093 pm_node_t *element;
18094
18095 if (accept1(parser, PM_TOKEN_USTAR)) {
18096 pm_token_t operator = parser->previous;
18097 pm_node_t *expression = NULL;
18098
18099 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18100 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18101 } else {
18102 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18103 }
18104
18105 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18106 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18107 if (parsed_bare_hash) {
18108 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18109 }
18110
18111 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18112 pm_static_literals_t hash_keys = { 0 };
18113
18114 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
18115 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18116 }
18117
18118 pm_static_literals_free(&hash_keys);
18119 parsed_bare_hash = true;
18120 } else {
18121 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18122
18123 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18124 if (parsed_bare_hash) {
18125 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18126 }
18127
18128 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18129 pm_static_literals_t hash_keys = { 0 };
18130 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18131
18132 pm_token_t operator;
18133 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18134 operator = parser->previous;
18135 } else {
18136 operator = not_provided(parser);
18137 }
18138
18139 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18140 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18141 pm_keyword_hash_node_elements_append(hash, assoc);
18142
18143 element = (pm_node_t *) hash;
18144 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18145 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18146 }
18147
18148 pm_static_literals_free(&hash_keys);
18149 parsed_bare_hash = true;
18150 }
18151 }
18152
18153 pm_array_node_elements_append(array, element);
18154 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18155 }
18156
18157 accept1(parser, PM_TOKEN_NEWLINE);
18158
18159 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18160 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18161 parser->previous.start = parser->previous.end;
18162 parser->previous.type = PM_TOKEN_MISSING;
18163 }
18164
18165 pm_array_node_close_set(array, &parser->previous);
18166 pm_accepts_block_stack_pop(parser);
18167
18168 return (pm_node_t *) array;
18169 }
18170 case PM_TOKEN_PARENTHESIS_LEFT:
18171 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18172 pm_token_t opening = parser->current;
18173 pm_node_flags_t flags = 0;
18174
18175 pm_node_list_t current_block_exits = { 0 };
18176 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18177
18178 parser_lex(parser);
18179 while (true) {
18180 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18181 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18182 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18183 break;
18184 }
18185 }
18186
18187 // If this is the end of the file or we match a right parenthesis, then
18188 // we have an empty parentheses node, and we can immediately return.
18189 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18190 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18191
18192 pop_block_exits(parser, previous_block_exits);
18193 pm_node_list_free(&current_block_exits);
18194
18195 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18196 }
18197
18198 // Otherwise, we're going to parse the first statement in the list
18199 // of statements within the parentheses.
18200 pm_accepts_block_stack_push(parser, true);
18201 context_push(parser, PM_CONTEXT_PARENS);
18202 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18203 context_pop(parser);
18204
18205 // Determine if this statement is followed by a terminator. In the
18206 // case of a single statement, this is fine. But in the case of
18207 // multiple statements it's required.
18208 bool terminator_found = false;
18209
18210 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18211 terminator_found = true;
18212 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18213 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18214 terminator_found = true;
18215 }
18216
18217 if (terminator_found) {
18218 while (true) {
18219 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18220 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18221 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18222 break;
18223 }
18224 }
18225 }
18226
18227 // If we hit a right parenthesis, then we're done parsing the
18228 // parentheses node, and we can check which kind of node we should
18229 // return.
18230 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18231 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18232 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18233 }
18234
18235 parser_lex(parser);
18236 pm_accepts_block_stack_pop(parser);
18237
18238 pop_block_exits(parser, previous_block_exits);
18239 pm_node_list_free(&current_block_exits);
18240
18241 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18242 // If we have a single statement and are ending on a right
18243 // parenthesis, then we need to check if this is possibly a
18244 // multiple target node.
18245 pm_multi_target_node_t *multi_target;
18246
18247 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18248 multi_target = (pm_multi_target_node_t *) statement;
18249 } else {
18250 multi_target = pm_multi_target_node_create(parser);
18251 pm_multi_target_node_targets_append(parser, multi_target, statement);
18252 }
18253
18254 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18255 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18256
18257 multi_target->lparen_loc = lparen_loc;
18258 multi_target->rparen_loc = rparen_loc;
18259 multi_target->base.location.start = lparen_loc.start;
18260 multi_target->base.location.end = rparen_loc.end;
18261
18262 pm_node_t *result;
18263 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18264 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18265 accept1(parser, PM_TOKEN_NEWLINE);
18266 } else {
18267 result = (pm_node_t *) multi_target;
18268 }
18269
18270 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18271 // All set, this is explicitly allowed by the parent
18272 // context.
18273 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18274 // All set, we're inside a for loop and we're parsing
18275 // multiple targets.
18276 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18277 // Multi targets are not allowed when it's not a
18278 // statement level.
18279 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18280 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18281 // Multi targets must be followed by an equal sign in
18282 // order to be valid (or a right parenthesis if they are
18283 // nested).
18284 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18285 }
18286
18287 return result;
18288 }
18289
18290 // If we have a single statement and are ending on a right parenthesis
18291 // and we didn't return a multiple assignment node, then we can return a
18292 // regular parentheses node now.
18293 pm_statements_node_t *statements = pm_statements_node_create(parser);
18294 pm_statements_node_body_append(parser, statements, statement, true);
18295
18296 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18297 }
18298
18299 // If we have more than one statement in the set of parentheses,
18300 // then we are going to parse all of them as a list of statements.
18301 // We'll do that here.
18302 context_push(parser, PM_CONTEXT_PARENS);
18303 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18304
18305 pm_statements_node_t *statements = pm_statements_node_create(parser);
18306 pm_statements_node_body_append(parser, statements, statement, true);
18307
18308 // If we didn't find a terminator and we didn't find a right
18309 // parenthesis, then this is a syntax error.
18310 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18311 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18312 }
18313
18314 // Parse each statement within the parentheses.
18315 while (true) {
18316 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18317 pm_statements_node_body_append(parser, statements, node, true);
18318
18319 // If we're recovering from a syntax error, then we need to stop
18320 // parsing the statements now.
18321 if (parser->recovering) {
18322 // If this is the level of context where the recovery has
18323 // happened, then we can mark the parser as done recovering.
18324 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18325 break;
18326 }
18327
18328 // If we couldn't parse an expression at all, then we need to
18329 // bail out of the loop.
18330 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18331
18332 // If we successfully parsed a statement, then we are going to
18333 // need terminator to delimit them.
18334 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18335 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18336 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18337 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18338 break;
18339 } else if (!match1(parser, PM_TOKEN_EOF)) {
18340 // If we're at the end of the file, then we're going to add
18341 // an error after this for the ) anyway.
18342 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18343 }
18344 }
18345
18346 context_pop(parser);
18347 pm_accepts_block_stack_pop(parser);
18348 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18349
18350 // When we're parsing multi targets, we allow them to be followed by
18351 // a right parenthesis if they are at the statement level. This is
18352 // only possible if they are the final statement in a parentheses.
18353 // We need to explicitly reject that here.
18354 {
18355 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18356
18357 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18358 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18359 pm_multi_target_node_targets_append(parser, multi_target, statement);
18360
18361 statement = (pm_node_t *) multi_target;
18362 statements->body.nodes[statements->body.size - 1] = statement;
18363 }
18364
18365 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18366 const uint8_t *offset = statement->location.end;
18367 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18368 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18369
18370 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18371 statements->body.nodes[statements->body.size - 1] = statement;
18372
18373 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18374 }
18375 }
18376
18377 pop_block_exits(parser, previous_block_exits);
18378 pm_node_list_free(&current_block_exits);
18379
18380 pm_void_statements_check(parser, statements, true);
18381 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18382 }
18383 case PM_TOKEN_BRACE_LEFT: {
18384 // If we were passed a current_hash_keys via the parser, then that
18385 // means we're already parsing a hash and we want to share the set
18386 // of hash keys with this inner hash we're about to parse for the
18387 // sake of warnings. We'll set it to NULL after we grab it to make
18388 // sure subsequent expressions don't use it. Effectively this is a
18389 // way of getting around passing it to every call to
18390 // parse_expression.
18391 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18392 parser->current_hash_keys = NULL;
18393
18394 pm_accepts_block_stack_push(parser, true);
18395 parser_lex(parser);
18396
18397 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18398
18399 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18400 if (current_hash_keys != NULL) {
18401 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18402 } else {
18403 pm_static_literals_t hash_keys = { 0 };
18404 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18405 pm_static_literals_free(&hash_keys);
18406 }
18407
18408 accept1(parser, PM_TOKEN_NEWLINE);
18409 }
18410
18411 pm_accepts_block_stack_pop(parser);
18412 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18413 pm_hash_node_closing_loc_set(node, &parser->previous);
18414
18415 return (pm_node_t *) node;
18416 }
18417 case PM_TOKEN_CHARACTER_LITERAL: {
18418 parser_lex(parser);
18419
18420 pm_token_t opening = parser->previous;
18421 opening.type = PM_TOKEN_STRING_BEGIN;
18422 opening.end = opening.start + 1;
18423
18424 pm_token_t content = parser->previous;
18425 content.type = PM_TOKEN_STRING_CONTENT;
18426 content.start = content.start + 1;
18427
18428 pm_token_t closing = not_provided(parser);
18429 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18430 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18431
18432 // Characters can be followed by strings in which case they are
18433 // automatically concatenated.
18434 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18435 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18436 }
18437
18438 return node;
18439 }
18440 case PM_TOKEN_CLASS_VARIABLE: {
18441 parser_lex(parser);
18442 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18443
18444 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18445 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18446 }
18447
18448 return node;
18449 }
18450 case PM_TOKEN_CONSTANT: {
18451 parser_lex(parser);
18452 pm_token_t constant = parser->previous;
18453
18454 // If a constant is immediately followed by parentheses, then this is in
18455 // fact a method call, not a constant read.
18456 if (
18457 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18458 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18459 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18460 match1(parser, PM_TOKEN_BRACE_LEFT)
18461 ) {
18462 pm_arguments_t arguments = { 0 };
18463 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18464 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18465 }
18466
18467 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18468
18469 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18470 // If we get here, then we have a comma immediately following a
18471 // constant, so we're going to parse this as a multiple assignment.
18472 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18473 }
18474
18475 return node;
18476 }
18477 case PM_TOKEN_UCOLON_COLON: {
18478 parser_lex(parser);
18479 pm_token_t delimiter = parser->previous;
18480
18481 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18482 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18483
18484 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18485 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18486 }
18487
18488 return node;
18489 }
18490 case PM_TOKEN_UDOT_DOT:
18491 case PM_TOKEN_UDOT_DOT_DOT: {
18492 pm_token_t operator = parser->current;
18493 parser_lex(parser);
18494
18495 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18496
18497 // Unary .. and ... are special because these are non-associative
18498 // operators that can also be unary operators. In this case we need
18499 // to explicitly reject code that has a .. or ... that follows this
18500 // expression.
18501 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18502 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18503 }
18504
18505 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18506 }
18507 case PM_TOKEN_FLOAT:
18508 parser_lex(parser);
18509 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18510 case PM_TOKEN_FLOAT_IMAGINARY:
18511 parser_lex(parser);
18512 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18513 case PM_TOKEN_FLOAT_RATIONAL:
18514 parser_lex(parser);
18515 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18516 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
18517 parser_lex(parser);
18518 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18519 case PM_TOKEN_NUMBERED_REFERENCE: {
18520 parser_lex(parser);
18521 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18522
18523 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18524 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18525 }
18526
18527 return node;
18528 }
18529 case PM_TOKEN_GLOBAL_VARIABLE: {
18530 parser_lex(parser);
18531 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18532
18533 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18534 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18535 }
18536
18537 return node;
18538 }
18539 case PM_TOKEN_BACK_REFERENCE: {
18540 parser_lex(parser);
18541 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18542
18543 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18544 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18545 }
18546
18547 return node;
18548 }
18549 case PM_TOKEN_IDENTIFIER:
18550 case PM_TOKEN_METHOD_NAME: {
18551 parser_lex(parser);
18552 pm_token_t identifier = parser->previous;
18553 pm_node_t *node = parse_variable_call(parser);
18554
18555 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18556 // If parse_variable_call returned with a call node, then we
18557 // know the identifier is not in the local table. In that case
18558 // we need to check if there are arguments following the
18559 // identifier.
18560 pm_call_node_t *call = (pm_call_node_t *) node;
18561 pm_arguments_t arguments = { 0 };
18562
18563 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18564 // Since we found arguments, we need to turn off the
18565 // variable call bit in the flags.
18566 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18567
18568 call->opening_loc = arguments.opening_loc;
18569 call->arguments = arguments.arguments;
18570 call->closing_loc = arguments.closing_loc;
18571 call->block = arguments.block;
18572
18573 if (arguments.block != NULL) {
18574 call->base.location.end = arguments.block->location.end;
18575 } else if (arguments.closing_loc.start == NULL) {
18576 if (arguments.arguments != NULL) {
18577 call->base.location.end = arguments.arguments->base.location.end;
18578 } else {
18579 call->base.location.end = call->message_loc.end;
18580 }
18581 } else {
18582 call->base.location.end = arguments.closing_loc.end;
18583 }
18584 }
18585 } else {
18586 // Otherwise, we know the identifier is in the local table. This
18587 // can still be a method call if it is followed by arguments or
18588 // a block, so we need to check for that here.
18589 if (
18590 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18591 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18592 match1(parser, PM_TOKEN_BRACE_LEFT)
18593 ) {
18594 pm_arguments_t arguments = { 0 };
18595 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18596 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18597
18598 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18599 // If we're about to convert an 'it' implicit local
18600 // variable read into a method call, we need to remove
18601 // it from the list of implicit local variables.
18602 parse_target_implicit_parameter(parser, node);
18603 } else {
18604 // Otherwise, we're about to convert a regular local
18605 // variable read into a method call, in which case we
18606 // need to indicate that this was not a read for the
18607 // purposes of warnings.
18608 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18609
18610 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18611 parse_target_implicit_parameter(parser, node);
18612 } else {
18614 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18615 }
18616 }
18617
18618 pm_node_destroy(parser, node);
18619 return (pm_node_t *) fcall;
18620 }
18621 }
18622
18623 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18624 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18625 }
18626
18627 return node;
18628 }
18629 case PM_TOKEN_HEREDOC_START: {
18630 // Here we have found a heredoc. We'll parse it and add it to the
18631 // list of strings.
18632 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18633 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18634
18635 size_t common_whitespace = (size_t) -1;
18636 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18637
18638 parser_lex(parser);
18639 pm_token_t opening = parser->previous;
18640
18641 pm_node_t *node;
18642 pm_node_t *part;
18643
18644 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18645 // If we get here, then we have an empty heredoc. We'll create
18646 // an empty content token and return an empty string node.
18647 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18648 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18649
18650 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18651 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18652 } else {
18653 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18654 }
18655
18656 node->location.end = opening.end;
18657 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18658 // If we get here, then we tried to find something in the
18659 // heredoc but couldn't actually parse anything, so we'll just
18660 // return a missing node.
18661 //
18662 // parse_string_part handles its own errors, so there is no need
18663 // for us to add one here.
18664 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18665 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18666 // If we get here, then the part that we parsed was plain string
18667 // content and we're at the end of the heredoc, so we can return
18668 // just a string node with the heredoc opening and closing as
18669 // its opening and closing.
18670 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18671 pm_string_node_t *cast = (pm_string_node_t *) part;
18672
18673 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18674 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18675 cast->base.location = cast->opening_loc;
18676
18677 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18678 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18679 cast->base.type = PM_X_STRING_NODE;
18680 }
18681
18682 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18683 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18684 }
18685
18686 node = (pm_node_t *) cast;
18687 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18688 } else {
18689 // If we get here, then we have multiple parts in the heredoc,
18690 // so we'll need to create an interpolated string node to hold
18691 // them all.
18692 pm_node_list_t parts = { 0 };
18693 pm_node_list_append(&parts, part);
18694
18695 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18696 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18697 pm_node_list_append(&parts, part);
18698 }
18699 }
18700
18701 // Now that we have all of the parts, create the correct type of
18702 // interpolated node.
18703 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18704 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18705 cast->parts = parts;
18706
18707 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18708 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18709
18710 cast->base.location = cast->opening_loc;
18711 node = (pm_node_t *) cast;
18712 } else {
18713 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18714 pm_node_list_free(&parts);
18715
18716 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18717 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18718
18719 cast->base.location = cast->opening_loc;
18720 node = (pm_node_t *) cast;
18721 }
18722
18723 // If this is a heredoc that is indented with a ~, then we need
18724 // to dedent each line by the common leading whitespace.
18725 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18726 pm_node_list_t *nodes;
18727 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18728 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18729 } else {
18730 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18731 }
18732
18733 parse_heredoc_dedent(parser, nodes, common_whitespace);
18734 }
18735 }
18736
18737 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18738 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18739 }
18740
18741 return node;
18742 }
18743 case PM_TOKEN_INSTANCE_VARIABLE: {
18744 parser_lex(parser);
18745 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18746
18747 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18748 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18749 }
18750
18751 return node;
18752 }
18753 case PM_TOKEN_INTEGER: {
18754 pm_node_flags_t base = parser->integer_base;
18755 parser_lex(parser);
18756 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18757 }
18758 case PM_TOKEN_INTEGER_IMAGINARY: {
18759 pm_node_flags_t base = parser->integer_base;
18760 parser_lex(parser);
18761 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18762 }
18763 case PM_TOKEN_INTEGER_RATIONAL: {
18764 pm_node_flags_t base = parser->integer_base;
18765 parser_lex(parser);
18766 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18767 }
18768 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18769 pm_node_flags_t base = parser->integer_base;
18770 parser_lex(parser);
18771 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18772 }
18773 case PM_TOKEN_KEYWORD___ENCODING__:
18774 parser_lex(parser);
18775 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18776 case PM_TOKEN_KEYWORD___FILE__:
18777 parser_lex(parser);
18778 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18779 case PM_TOKEN_KEYWORD___LINE__:
18780 parser_lex(parser);
18781 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18782 case PM_TOKEN_KEYWORD_ALIAS: {
18783 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18784 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18785 }
18786
18787 parser_lex(parser);
18788 pm_token_t keyword = parser->previous;
18789
18790 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18791 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18792
18793 switch (PM_NODE_TYPE(new_name)) {
18794 case PM_BACK_REFERENCE_READ_NODE:
18795 case PM_NUMBERED_REFERENCE_READ_NODE:
18796 case PM_GLOBAL_VARIABLE_READ_NODE: {
18797 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18798 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18799 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18800 }
18801 } else {
18802 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18803 }
18804
18805 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18806 }
18807 case PM_SYMBOL_NODE:
18808 case PM_INTERPOLATED_SYMBOL_NODE: {
18809 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18810 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18811 }
18812 }
18814 default:
18815 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18816 }
18817 }
18818 case PM_TOKEN_KEYWORD_CASE: {
18819 size_t opening_newline_index = token_newline_index(parser);
18820 parser_lex(parser);
18821
18822 pm_token_t case_keyword = parser->previous;
18823 pm_node_t *predicate = NULL;
18824
18825 pm_node_list_t current_block_exits = { 0 };
18826 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18827
18828 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18829 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18830 predicate = NULL;
18831 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18832 predicate = NULL;
18833 } else if (!token_begins_expression_p(parser->current.type)) {
18834 predicate = NULL;
18835 } else {
18836 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18837 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18838 }
18839
18840 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18841 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18842 parser_lex(parser);
18843
18844 pop_block_exits(parser, previous_block_exits);
18845 pm_node_list_free(&current_block_exits);
18846
18847 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18848 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18849 }
18850
18851 // At this point we can create a case node, though we don't yet know
18852 // if it is a case-in or case-when node.
18853 pm_token_t end_keyword = not_provided(parser);
18854 pm_node_t *node;
18855
18856 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18857 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18858 pm_static_literals_t literals = { 0 };
18859
18860 // At this point we've seen a when keyword, so we know this is a
18861 // case-when node. We will continue to parse the when nodes
18862 // until we hit the end of the list.
18863 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18864 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18865 parser_lex(parser);
18866
18867 pm_token_t when_keyword = parser->previous;
18868 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18869
18870 do {
18871 if (accept1(parser, PM_TOKEN_USTAR)) {
18872 pm_token_t operator = parser->previous;
18873 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18874
18875 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18876 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18877
18878 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18879 } else {
18880 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18881 pm_when_node_conditions_append(when_node, condition);
18882
18883 // If we found a missing node, then this is a syntax
18884 // error and we should stop looping.
18885 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18886
18887 // If this is a string node, then we need to mark it
18888 // as frozen because when clause strings are frozen.
18889 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18890 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18891 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18892 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18893 }
18894
18895 pm_when_clause_static_literals_add(parser, &literals, condition);
18896 }
18897 } while (accept1(parser, PM_TOKEN_COMMA));
18898
18899 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18900 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18901 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18902 }
18903 } else {
18904 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18905 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18906 }
18907
18908 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18909 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18910 if (statements != NULL) {
18911 pm_when_node_statements_set(when_node, statements);
18912 }
18913 }
18914
18915 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18916 }
18917
18918 // If we didn't parse any conditions (in or when) then we need
18919 // to indicate that we have an error.
18920 if (case_node->conditions.size == 0) {
18921 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18922 }
18923
18924 pm_static_literals_free(&literals);
18925 node = (pm_node_t *) case_node;
18926 } else {
18927 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18928
18929 // If this is a case-match node (i.e., it is a pattern matching
18930 // case statement) then we must have a predicate.
18931 if (predicate == NULL) {
18932 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18933 }
18934
18935 // At this point we expect that we're parsing a case-in node. We
18936 // will continue to parse the in nodes until we hit the end of
18937 // the list.
18938 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18939 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18940
18941 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18942 parser->pattern_matching_newlines = true;
18943
18944 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18945 parser->command_start = false;
18946 parser_lex(parser);
18947
18948 pm_token_t in_keyword = parser->previous;
18949
18950 pm_constant_id_list_t captures = { 0 };
18951 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18952
18953 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18954 pm_constant_id_list_free(&captures);
18955
18956 // Since we're in the top-level of the case-in node we need
18957 // to check for guard clauses in the form of `if` or
18958 // `unless` statements.
18959 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18960 pm_token_t keyword = parser->previous;
18961 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18962 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18963 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18964 pm_token_t keyword = parser->previous;
18965 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18966 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18967 }
18968
18969 // Now we need to check for the terminator of the in node's
18970 // pattern. It can be a newline or semicolon optionally
18971 // followed by a `then` keyword.
18972 pm_token_t then_keyword;
18973 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18974 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18975 then_keyword = parser->previous;
18976 } else {
18977 then_keyword = not_provided(parser);
18978 }
18979 } else {
18980 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18981 then_keyword = parser->previous;
18982 }
18983
18984 // Now we can actually parse the statements associated with
18985 // the in node.
18986 pm_statements_node_t *statements;
18987 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18988 statements = NULL;
18989 } else {
18990 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18991 }
18992
18993 // Now that we have the full pattern and statements, we can
18994 // create the node and attach it to the case node.
18995 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18996 pm_case_match_node_condition_append(case_node, condition);
18997 }
18998
18999 // If we didn't parse any conditions (in or when) then we need
19000 // to indicate that we have an error.
19001 if (case_node->conditions.size == 0) {
19002 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19003 }
19004
19005 node = (pm_node_t *) case_node;
19006 }
19007
19008 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19009 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19010 pm_token_t else_keyword = parser->previous;
19011 pm_else_node_t *else_node;
19012
19013 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19014 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19015 } else {
19016 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19017 }
19018
19019 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19020 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19021 } else {
19022 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19023 }
19024 }
19025
19026 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19027 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19028
19029 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19030 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19031 } else {
19032 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19033 }
19034
19035 pop_block_exits(parser, previous_block_exits);
19036 pm_node_list_free(&current_block_exits);
19037
19038 return node;
19039 }
19040 case PM_TOKEN_KEYWORD_BEGIN: {
19041 size_t opening_newline_index = token_newline_index(parser);
19042 parser_lex(parser);
19043
19044 pm_token_t begin_keyword = parser->previous;
19045 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19046
19047 pm_node_list_t current_block_exits = { 0 };
19048 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19049 pm_statements_node_t *begin_statements = NULL;
19050
19051 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19052 pm_accepts_block_stack_push(parser, true);
19053 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19054 pm_accepts_block_stack_pop(parser);
19055 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19056 }
19057
19058 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19059 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19060 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19061
19062 begin_node->base.location.end = parser->previous.end;
19063 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19064
19065 pop_block_exits(parser, previous_block_exits);
19066 pm_node_list_free(&current_block_exits);
19067
19068 return (pm_node_t *) begin_node;
19069 }
19070 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19071 pm_node_list_t current_block_exits = { 0 };
19072 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19073
19074 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19075 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19076 }
19077
19078 parser_lex(parser);
19079 pm_token_t keyword = parser->previous;
19080
19081 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19082 pm_token_t opening = parser->previous;
19083 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19084
19085 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19086 pm_context_t context = parser->current_context->context;
19087 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19088 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19089 }
19090
19091 flush_block_exits(parser, previous_block_exits);
19092 pm_node_list_free(&current_block_exits);
19093
19094 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19095 }
19096 case PM_TOKEN_KEYWORD_BREAK:
19097 case PM_TOKEN_KEYWORD_NEXT:
19098 case PM_TOKEN_KEYWORD_RETURN: {
19099 parser_lex(parser);
19100
19101 pm_token_t keyword = parser->previous;
19102 pm_arguments_t arguments = { 0 };
19103
19104 if (
19105 token_begins_expression_p(parser->current.type) ||
19106 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19107 ) {
19108 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19109
19110 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19111 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19112 }
19113 }
19114
19115 switch (keyword.type) {
19116 case PM_TOKEN_KEYWORD_BREAK: {
19117 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19118 if (!parser->partial_script) parse_block_exit(parser, node);
19119 return node;
19120 }
19121 case PM_TOKEN_KEYWORD_NEXT: {
19122 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19123 if (!parser->partial_script) parse_block_exit(parser, node);
19124 return node;
19125 }
19126 case PM_TOKEN_KEYWORD_RETURN: {
19127 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19128 parse_return(parser, node);
19129 return node;
19130 }
19131 default:
19132 assert(false && "unreachable");
19133 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19134 }
19135 }
19136 case PM_TOKEN_KEYWORD_SUPER: {
19137 parser_lex(parser);
19138
19139 pm_token_t keyword = parser->previous;
19140 pm_arguments_t arguments = { 0 };
19141 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19142
19143 if (
19144 arguments.opening_loc.start == NULL &&
19145 arguments.arguments == NULL &&
19146 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19147 ) {
19148 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19149 }
19150
19151 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19152 }
19153 case PM_TOKEN_KEYWORD_YIELD: {
19154 parser_lex(parser);
19155
19156 pm_token_t keyword = parser->previous;
19157 pm_arguments_t arguments = { 0 };
19158 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19159
19160 // It's possible that we've parsed a block argument through our
19161 // call to parse_arguments_list. If we found one, we should mark it
19162 // as invalid and destroy it, as we don't have a place for it on the
19163 // yield node.
19164 if (arguments.block != NULL) {
19165 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19166 pm_node_destroy(parser, arguments.block);
19167 arguments.block = NULL;
19168 }
19169
19170 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19171 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19172
19173 return node;
19174 }
19175 case PM_TOKEN_KEYWORD_CLASS: {
19176 size_t opening_newline_index = token_newline_index(parser);
19177 parser_lex(parser);
19178
19179 pm_token_t class_keyword = parser->previous;
19180 pm_do_loop_stack_push(parser, false);
19181
19182 pm_node_list_t current_block_exits = { 0 };
19183 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19184
19185 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19186 pm_token_t operator = parser->previous;
19187 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19188
19189 pm_parser_scope_push(parser, true);
19190 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19191 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19192 }
19193
19194 pm_node_t *statements = NULL;
19195 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19196 pm_accepts_block_stack_push(parser, true);
19197 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19198 pm_accepts_block_stack_pop(parser);
19199 }
19200
19201 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19202 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19203 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19204 } else {
19205 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19206 }
19207
19208 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19209
19210 pm_constant_id_list_t locals;
19211 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19212
19213 pm_parser_scope_pop(parser);
19214 pm_do_loop_stack_pop(parser);
19215
19216 flush_block_exits(parser, previous_block_exits);
19217 pm_node_list_free(&current_block_exits);
19218
19219 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19220 }
19221
19222 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19223 pm_token_t name = parser->previous;
19224 if (name.type != PM_TOKEN_CONSTANT) {
19225 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19226 }
19227
19228 pm_token_t inheritance_operator;
19229 pm_node_t *superclass;
19230
19231 if (match1(parser, PM_TOKEN_LESS)) {
19232 inheritance_operator = parser->current;
19233 lex_state_set(parser, PM_LEX_STATE_BEG);
19234
19235 parser->command_start = true;
19236 parser_lex(parser);
19237
19238 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19239 } else {
19240 inheritance_operator = not_provided(parser);
19241 superclass = NULL;
19242 }
19243
19244 pm_parser_scope_push(parser, true);
19245
19246 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19247 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19248 } else {
19249 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19250 }
19251 pm_node_t *statements = NULL;
19252
19253 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19254 pm_accepts_block_stack_push(parser, true);
19255 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19256 pm_accepts_block_stack_pop(parser);
19257 }
19258
19259 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19260 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19261 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19262 } else {
19263 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19264 }
19265
19266 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19267
19268 if (context_def_p(parser)) {
19269 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19270 }
19271
19272 pm_constant_id_list_t locals;
19273 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19274
19275 pm_parser_scope_pop(parser);
19276 pm_do_loop_stack_pop(parser);
19277
19278 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19279 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19280 }
19281
19282 pop_block_exits(parser, previous_block_exits);
19283 pm_node_list_free(&current_block_exits);
19284
19285 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19286 }
19287 case PM_TOKEN_KEYWORD_DEF: {
19288 pm_node_list_t current_block_exits = { 0 };
19289 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19290
19291 pm_token_t def_keyword = parser->current;
19292 size_t opening_newline_index = token_newline_index(parser);
19293
19294 pm_node_t *receiver = NULL;
19295 pm_token_t operator = not_provided(parser);
19296 pm_token_t name;
19297
19298 // This context is necessary for lexing `...` in a bare params
19299 // correctly. It must be pushed before lexing the first param, so it
19300 // is here.
19301 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19302 parser_lex(parser);
19303
19304 // This will be false if the method name is not a valid identifier
19305 // but could be followed by an operator.
19306 bool valid_name = true;
19307
19308 switch (parser->current.type) {
19309 case PM_CASE_OPERATOR:
19310 pm_parser_scope_push(parser, true);
19311 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19312 parser_lex(parser);
19313
19314 name = parser->previous;
19315 break;
19316 case PM_TOKEN_IDENTIFIER: {
19317 parser_lex(parser);
19318
19319 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19320 receiver = parse_variable_call(parser);
19321
19322 pm_parser_scope_push(parser, true);
19323 lex_state_set(parser, PM_LEX_STATE_FNAME);
19324 parser_lex(parser);
19325
19326 operator = parser->previous;
19327 name = parse_method_definition_name(parser);
19328 } else {
19329 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19330 pm_parser_scope_push(parser, true);
19331
19332 name = parser->previous;
19333 }
19334
19335 break;
19336 }
19337 case PM_TOKEN_INSTANCE_VARIABLE:
19338 case PM_TOKEN_CLASS_VARIABLE:
19339 case PM_TOKEN_GLOBAL_VARIABLE:
19340 valid_name = false;
19342 case PM_TOKEN_CONSTANT:
19343 case PM_TOKEN_KEYWORD_NIL:
19344 case PM_TOKEN_KEYWORD_SELF:
19345 case PM_TOKEN_KEYWORD_TRUE:
19346 case PM_TOKEN_KEYWORD_FALSE:
19347 case PM_TOKEN_KEYWORD___FILE__:
19348 case PM_TOKEN_KEYWORD___LINE__:
19349 case PM_TOKEN_KEYWORD___ENCODING__: {
19350 pm_parser_scope_push(parser, true);
19351 parser_lex(parser);
19352
19353 pm_token_t identifier = parser->previous;
19354
19355 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19356 lex_state_set(parser, PM_LEX_STATE_FNAME);
19357 parser_lex(parser);
19358 operator = parser->previous;
19359
19360 switch (identifier.type) {
19361 case PM_TOKEN_CONSTANT:
19362 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19363 break;
19364 case PM_TOKEN_INSTANCE_VARIABLE:
19365 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19366 break;
19367 case PM_TOKEN_CLASS_VARIABLE:
19368 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19369 break;
19370 case PM_TOKEN_GLOBAL_VARIABLE:
19371 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19372 break;
19373 case PM_TOKEN_KEYWORD_NIL:
19374 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19375 break;
19376 case PM_TOKEN_KEYWORD_SELF:
19377 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19378 break;
19379 case PM_TOKEN_KEYWORD_TRUE:
19380 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19381 break;
19382 case PM_TOKEN_KEYWORD_FALSE:
19383 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19384 break;
19385 case PM_TOKEN_KEYWORD___FILE__:
19386 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19387 break;
19388 case PM_TOKEN_KEYWORD___LINE__:
19389 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19390 break;
19391 case PM_TOKEN_KEYWORD___ENCODING__:
19392 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19393 break;
19394 default:
19395 break;
19396 }
19397
19398 name = parse_method_definition_name(parser);
19399 } else {
19400 if (!valid_name) {
19401 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19402 }
19403
19404 name = identifier;
19405 }
19406 break;
19407 }
19408 case PM_TOKEN_PARENTHESIS_LEFT: {
19409 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19410 // the inner expression of this parenthesis should not be
19411 // processed under this context. Thus, the context is popped
19412 // here.
19413 context_pop(parser);
19414 parser_lex(parser);
19415
19416 pm_token_t lparen = parser->previous;
19417 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19418
19419 accept1(parser, PM_TOKEN_NEWLINE);
19420 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19421 pm_token_t rparen = parser->previous;
19422
19423 lex_state_set(parser, PM_LEX_STATE_FNAME);
19424 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19425
19426 operator = parser->previous;
19427 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19428
19429 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19430 // reason as described the above.
19431 pm_parser_scope_push(parser, true);
19432 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19433 name = parse_method_definition_name(parser);
19434 break;
19435 }
19436 default:
19437 pm_parser_scope_push(parser, true);
19438 name = parse_method_definition_name(parser);
19439 break;
19440 }
19441
19442 pm_token_t lparen;
19443 pm_token_t rparen;
19444 pm_parameters_node_t *params;
19445
19446 switch (parser->current.type) {
19447 case PM_TOKEN_PARENTHESIS_LEFT: {
19448 parser_lex(parser);
19449 lparen = parser->previous;
19450
19451 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19452 params = NULL;
19453 } else {
19454 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19455 }
19456
19457 lex_state_set(parser, PM_LEX_STATE_BEG);
19458 parser->command_start = true;
19459
19460 context_pop(parser);
19461 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19462 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19463 parser->previous.start = parser->previous.end;
19464 parser->previous.type = PM_TOKEN_MISSING;
19465 }
19466
19467 rparen = parser->previous;
19468 break;
19469 }
19470 case PM_CASE_PARAMETER: {
19471 // If we're about to lex a label, we need to add the label
19472 // state to make sure the next newline is ignored.
19473 if (parser->current.type == PM_TOKEN_LABEL) {
19474 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19475 }
19476
19477 lparen = not_provided(parser);
19478 rparen = not_provided(parser);
19479 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19480
19481 context_pop(parser);
19482 break;
19483 }
19484 default: {
19485 lparen = not_provided(parser);
19486 rparen = not_provided(parser);
19487 params = NULL;
19488
19489 context_pop(parser);
19490 break;
19491 }
19492 }
19493
19494 pm_node_t *statements = NULL;
19495 pm_token_t equal;
19496 pm_token_t end_keyword;
19497
19498 if (accept1(parser, PM_TOKEN_EQUAL)) {
19499 if (token_is_setter_name(&name)) {
19500 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19501 }
19502 equal = parser->previous;
19503
19504 context_push(parser, PM_CONTEXT_DEF);
19505 pm_do_loop_stack_push(parser, false);
19506 statements = (pm_node_t *) pm_statements_node_create(parser);
19507
19508 // In endless method bodies, we need to handle command calls carefully.
19509 // We want to allow command calls in assignment context but maintain
19510 // the same binding power to avoid changing how operators are parsed.
19511 // Note that we're intentionally NOT allowing code like `private def foo = puts "Hello"`
19512 // because the original parser, parse.y, can't handle it and we want to maintain the same behavior
19513 bool allow_command_call = (binding_power == PM_BINDING_POWER_ASSIGNMENT) ||
19514 (binding_power < PM_BINDING_POWER_COMPOSITION);
19515
19516 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19517
19518 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19519 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19520
19521 pm_token_t rescue_keyword = parser->previous;
19522 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19523 context_pop(parser);
19524
19525 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19526 }
19527
19528 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19529 pm_do_loop_stack_pop(parser);
19530 context_pop(parser);
19531 end_keyword = not_provided(parser);
19532 } else {
19533 equal = not_provided(parser);
19534
19535 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19536 lex_state_set(parser, PM_LEX_STATE_BEG);
19537 parser->command_start = true;
19538 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19539 } else {
19540 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19541 }
19542
19543 pm_accepts_block_stack_push(parser, true);
19544 pm_do_loop_stack_push(parser, false);
19545
19546 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19547 pm_accepts_block_stack_push(parser, true);
19548 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19549 pm_accepts_block_stack_pop(parser);
19550 }
19551
19552 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19553 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19554 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19555 } else {
19556 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19557 }
19558
19559 pm_accepts_block_stack_pop(parser);
19560 pm_do_loop_stack_pop(parser);
19561
19562 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19563 end_keyword = parser->previous;
19564 }
19565
19566 pm_constant_id_list_t locals;
19567 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19568 pm_parser_scope_pop(parser);
19569
19575 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19576
19577 flush_block_exits(parser, previous_block_exits);
19578 pm_node_list_free(&current_block_exits);
19579
19580 return (pm_node_t *) pm_def_node_create(
19581 parser,
19582 name_id,
19583 &name,
19584 receiver,
19585 params,
19586 statements,
19587 &locals,
19588 &def_keyword,
19589 &operator,
19590 &lparen,
19591 &rparen,
19592 &equal,
19593 &end_keyword
19594 );
19595 }
19596 case PM_TOKEN_KEYWORD_DEFINED: {
19597 parser_lex(parser);
19598 pm_token_t keyword = parser->previous;
19599
19600 pm_token_t lparen;
19601 pm_token_t rparen;
19602 pm_node_t *expression;
19603
19604 context_push(parser, PM_CONTEXT_DEFINED);
19605 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19606
19607 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19608 lparen = parser->previous;
19609
19610 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19611 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19612 lparen = not_provided(parser);
19613 rparen = not_provided(parser);
19614 } else {
19615 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19616
19617 if (parser->recovering) {
19618 rparen = not_provided(parser);
19619 } else {
19620 accept1(parser, PM_TOKEN_NEWLINE);
19621 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19622 rparen = parser->previous;
19623 }
19624 }
19625 } else {
19626 lparen = not_provided(parser);
19627 rparen = not_provided(parser);
19628 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19629 }
19630
19631 context_pop(parser);
19632 return (pm_node_t *) pm_defined_node_create(
19633 parser,
19634 &lparen,
19635 expression,
19636 &rparen,
19637 &PM_LOCATION_TOKEN_VALUE(&keyword)
19638 );
19639 }
19640 case PM_TOKEN_KEYWORD_END_UPCASE: {
19641 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19642 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19643 }
19644
19645 parser_lex(parser);
19646 pm_token_t keyword = parser->previous;
19647
19648 if (context_def_p(parser)) {
19649 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19650 }
19651
19652 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19653 pm_token_t opening = parser->previous;
19654 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19655
19656 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19657 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19658 }
19659 case PM_TOKEN_KEYWORD_FALSE:
19660 parser_lex(parser);
19661 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19662 case PM_TOKEN_KEYWORD_FOR: {
19663 size_t opening_newline_index = token_newline_index(parser);
19664 parser_lex(parser);
19665
19666 pm_token_t for_keyword = parser->previous;
19667 pm_node_t *index;
19668
19669 context_push(parser, PM_CONTEXT_FOR_INDEX);
19670
19671 // First, parse out the first index expression.
19672 if (accept1(parser, PM_TOKEN_USTAR)) {
19673 pm_token_t star_operator = parser->previous;
19674 pm_node_t *name = NULL;
19675
19676 if (token_begins_expression_p(parser->current.type)) {
19677 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19678 }
19679
19680 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19681 } else if (token_begins_expression_p(parser->current.type)) {
19682 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19683 } else {
19684 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19685 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19686 }
19687
19688 // Now, if there are multiple index expressions, parse them out.
19689 if (match1(parser, PM_TOKEN_COMMA)) {
19690 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19691 } else {
19692 index = parse_target(parser, index, false, false);
19693 }
19694
19695 context_pop(parser);
19696 pm_do_loop_stack_push(parser, true);
19697
19698 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19699 pm_token_t in_keyword = parser->previous;
19700
19701 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19702 pm_do_loop_stack_pop(parser);
19703
19704 pm_token_t do_keyword;
19705 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19706 do_keyword = parser->previous;
19707 } else {
19708 do_keyword = not_provided(parser);
19709 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19710 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19711 }
19712 }
19713
19714 pm_statements_node_t *statements = NULL;
19715 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19716 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19717 }
19718
19719 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19720 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19721
19722 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19723 }
19724 case PM_TOKEN_KEYWORD_IF:
19725 if (parser_end_of_line_p(parser)) {
19726 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19727 }
19728
19729 size_t opening_newline_index = token_newline_index(parser);
19730 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19731 parser_lex(parser);
19732
19733 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19734 case PM_TOKEN_KEYWORD_UNDEF: {
19735 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19736 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19737 }
19738
19739 parser_lex(parser);
19740 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19741 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19742
19743 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19744 pm_node_destroy(parser, name);
19745 } else {
19746 pm_undef_node_append(undef, name);
19747
19748 while (match1(parser, PM_TOKEN_COMMA)) {
19749 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19750 parser_lex(parser);
19751 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19752
19753 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19754 pm_node_destroy(parser, name);
19755 break;
19756 }
19757
19758 pm_undef_node_append(undef, name);
19759 }
19760 }
19761
19762 return (pm_node_t *) undef;
19763 }
19764 case PM_TOKEN_KEYWORD_NOT: {
19765 parser_lex(parser);
19766
19767 pm_token_t message = parser->previous;
19768 pm_arguments_t arguments = { 0 };
19769 pm_node_t *receiver = NULL;
19770
19771 // If we do not accept a command call, then we also do not accept a
19772 // not without parentheses. In this case we need to reject this
19773 // syntax.
19774 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19775 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19776 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19777 } else {
19778 accept1(parser, PM_TOKEN_NEWLINE);
19779 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19780 }
19781
19782 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19783 }
19784
19785 accept1(parser, PM_TOKEN_NEWLINE);
19786
19787 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19788 pm_token_t lparen = parser->previous;
19789
19790 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19791 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19792 } else {
19793 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19794 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19795
19796 if (!parser->recovering) {
19797 accept1(parser, PM_TOKEN_NEWLINE);
19798 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19799 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19800 }
19801 }
19802 } else {
19803 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19804 }
19805
19806 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19807 }
19808 case PM_TOKEN_KEYWORD_UNLESS: {
19809 size_t opening_newline_index = token_newline_index(parser);
19810 parser_lex(parser);
19811
19812 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19813 }
19814 case PM_TOKEN_KEYWORD_MODULE: {
19815 pm_node_list_t current_block_exits = { 0 };
19816 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19817
19818 size_t opening_newline_index = token_newline_index(parser);
19819 parser_lex(parser);
19820 pm_token_t module_keyword = parser->previous;
19821
19822 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19823 pm_token_t name;
19824
19825 // If we can recover from a syntax error that occurred while parsing
19826 // the name of the module, then we'll handle that here.
19827 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19828 pop_block_exits(parser, previous_block_exits);
19829 pm_node_list_free(&current_block_exits);
19830
19831 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19832 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19833 }
19834
19835 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19836 pm_token_t double_colon = parser->previous;
19837
19838 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19839 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19840 }
19841
19842 // Here we retrieve the name of the module. If it wasn't a constant,
19843 // then it's possible that `module foo` was passed, which is a
19844 // syntax error. We handle that here as well.
19845 name = parser->previous;
19846 if (name.type != PM_TOKEN_CONSTANT) {
19847 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19848 }
19849
19850 pm_parser_scope_push(parser, true);
19851 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19852 pm_node_t *statements = NULL;
19853
19854 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19855 pm_accepts_block_stack_push(parser, true);
19856 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19857 pm_accepts_block_stack_pop(parser);
19858 }
19859
19860 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19861 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19862 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19863 } else {
19864 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19865 }
19866
19867 pm_constant_id_list_t locals;
19868 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19869
19870 pm_parser_scope_pop(parser);
19871 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19872
19873 if (context_def_p(parser)) {
19874 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19875 }
19876
19877 pop_block_exits(parser, previous_block_exits);
19878 pm_node_list_free(&current_block_exits);
19879
19880 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19881 }
19882 case PM_TOKEN_KEYWORD_NIL:
19883 parser_lex(parser);
19884 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19885 case PM_TOKEN_KEYWORD_REDO: {
19886 parser_lex(parser);
19887
19888 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19889 if (!parser->partial_script) parse_block_exit(parser, node);
19890
19891 return node;
19892 }
19893 case PM_TOKEN_KEYWORD_RETRY: {
19894 parser_lex(parser);
19895
19896 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19897 parse_retry(parser, node);
19898
19899 return node;
19900 }
19901 case PM_TOKEN_KEYWORD_SELF:
19902 parser_lex(parser);
19903 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19904 case PM_TOKEN_KEYWORD_TRUE:
19905 parser_lex(parser);
19906 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19907 case PM_TOKEN_KEYWORD_UNTIL: {
19908 size_t opening_newline_index = token_newline_index(parser);
19909
19910 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19911 pm_do_loop_stack_push(parser, true);
19912
19913 parser_lex(parser);
19914 pm_token_t keyword = parser->previous;
19915 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19916
19917 pm_do_loop_stack_pop(parser);
19918 context_pop(parser);
19919
19920 pm_token_t do_keyword;
19921 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19922 do_keyword = parser->previous;
19923 } else {
19924 do_keyword = not_provided(parser);
19925 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19926 }
19927
19928 pm_statements_node_t *statements = NULL;
19929 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19930 pm_accepts_block_stack_push(parser, true);
19931 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19932 pm_accepts_block_stack_pop(parser);
19933 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19934 }
19935
19936 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19937 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19938
19939 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19940 }
19941 case PM_TOKEN_KEYWORD_WHILE: {
19942 size_t opening_newline_index = token_newline_index(parser);
19943
19944 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19945 pm_do_loop_stack_push(parser, true);
19946
19947 parser_lex(parser);
19948 pm_token_t keyword = parser->previous;
19949 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19950
19951 pm_do_loop_stack_pop(parser);
19952 context_pop(parser);
19953
19954 pm_token_t do_keyword;
19955 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19956 do_keyword = parser->previous;
19957 } else {
19958 do_keyword = not_provided(parser);
19959 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19960 }
19961
19962 pm_statements_node_t *statements = NULL;
19963 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19964 pm_accepts_block_stack_push(parser, true);
19965 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19966 pm_accepts_block_stack_pop(parser);
19967 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19968 }
19969
19970 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19971 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19972
19973 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19974 }
19975 case PM_TOKEN_PERCENT_LOWER_I: {
19976 parser_lex(parser);
19977 pm_token_t opening = parser->previous;
19978 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19979
19980 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19981 accept1(parser, PM_TOKEN_WORDS_SEP);
19982 if (match1(parser, PM_TOKEN_STRING_END)) break;
19983
19984 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19985 pm_token_t opening = not_provided(parser);
19986 pm_token_t closing = not_provided(parser);
19987 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19988 }
19989
19990 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19991 }
19992
19993 pm_token_t closing = parser->current;
19994 if (match1(parser, PM_TOKEN_EOF)) {
19995 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19996 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19997 } else {
19998 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19999 }
20000 pm_array_node_close_set(array, &closing);
20001
20002 return (pm_node_t *) array;
20003 }
20004 case PM_TOKEN_PERCENT_UPPER_I: {
20005 parser_lex(parser);
20006 pm_token_t opening = parser->previous;
20007 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20008
20009 // This is the current node that we are parsing that will be added to the
20010 // list of elements.
20011 pm_node_t *current = NULL;
20012
20013 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20014 switch (parser->current.type) {
20015 case PM_TOKEN_WORDS_SEP: {
20016 if (current == NULL) {
20017 // If we hit a separator before we have any content, then we don't
20018 // need to do anything.
20019 } else {
20020 // If we hit a separator after we've hit content, then we need to
20021 // append that content to the list and reset the current node.
20022 pm_array_node_elements_append(array, current);
20023 current = NULL;
20024 }
20025
20026 parser_lex(parser);
20027 break;
20028 }
20029 case PM_TOKEN_STRING_CONTENT: {
20030 pm_token_t opening = not_provided(parser);
20031 pm_token_t closing = not_provided(parser);
20032
20033 if (current == NULL) {
20034 // If we hit content and the current node is NULL, then this is
20035 // the first string content we've seen. In that case we're going
20036 // to create a new string node and set that to the current.
20037 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20038 parser_lex(parser);
20039 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20040 // If we hit string content and the current node is an
20041 // interpolated string, then we need to append the string content
20042 // to the list of child nodes.
20043 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20044 parser_lex(parser);
20045
20046 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20047 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20048 // If we hit string content and the current node is a symbol node,
20049 // then we need to convert the current node into an interpolated
20050 // string and add the string content to the list of child nodes.
20051 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20052 pm_token_t bounds = not_provided(parser);
20053
20054 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20055 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20056 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20057 parser_lex(parser);
20058
20059 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20060 pm_interpolated_symbol_node_append(interpolated, first_string);
20061 pm_interpolated_symbol_node_append(interpolated, second_string);
20062
20063 xfree(current);
20064 current = (pm_node_t *) interpolated;
20065 } else {
20066 assert(false && "unreachable");
20067 }
20068
20069 break;
20070 }
20071 case PM_TOKEN_EMBVAR: {
20072 bool start_location_set = false;
20073 if (current == NULL) {
20074 // If we hit an embedded variable and the current node is NULL,
20075 // then this is the start of a new string. We'll set the current
20076 // node to a new interpolated string.
20077 pm_token_t opening = not_provided(parser);
20078 pm_token_t closing = not_provided(parser);
20079 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20080 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20081 // If we hit an embedded variable and the current node is a string
20082 // node, then we'll convert the current into an interpolated
20083 // string and add the string node to the list of parts.
20084 pm_token_t opening = not_provided(parser);
20085 pm_token_t closing = not_provided(parser);
20086 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20087
20088 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20089 pm_interpolated_symbol_node_append(interpolated, current);
20090 interpolated->base.location.start = current->location.start;
20091 start_location_set = true;
20092 current = (pm_node_t *) interpolated;
20093 } else {
20094 // If we hit an embedded variable and the current node is an
20095 // interpolated string, then we'll just add the embedded variable.
20096 }
20097
20098 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20099 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20100 if (!start_location_set) {
20101 current->location.start = part->location.start;
20102 }
20103 break;
20104 }
20105 case PM_TOKEN_EMBEXPR_BEGIN: {
20106 bool start_location_set = false;
20107 if (current == NULL) {
20108 // If we hit an embedded expression and the current node is NULL,
20109 // then this is the start of a new string. We'll set the current
20110 // node to a new interpolated string.
20111 pm_token_t opening = not_provided(parser);
20112 pm_token_t closing = not_provided(parser);
20113 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20114 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20115 // If we hit an embedded expression and the current node is a
20116 // string node, then we'll convert the current into an
20117 // interpolated string and add the string node to the list of
20118 // parts.
20119 pm_token_t opening = not_provided(parser);
20120 pm_token_t closing = not_provided(parser);
20121 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20122
20123 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20124 pm_interpolated_symbol_node_append(interpolated, current);
20125 interpolated->base.location.start = current->location.start;
20126 start_location_set = true;
20127 current = (pm_node_t *) interpolated;
20128 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20129 // If we hit an embedded expression and the current node is an
20130 // interpolated string, then we'll just continue on.
20131 } else {
20132 assert(false && "unreachable");
20133 }
20134
20135 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20136 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20137 if (!start_location_set) {
20138 current->location.start = part->location.start;
20139 }
20140 break;
20141 }
20142 default:
20143 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20144 parser_lex(parser);
20145 break;
20146 }
20147 }
20148
20149 // If we have a current node, then we need to append it to the list.
20150 if (current) {
20151 pm_array_node_elements_append(array, current);
20152 }
20153
20154 pm_token_t closing = parser->current;
20155 if (match1(parser, PM_TOKEN_EOF)) {
20156 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20157 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20158 } else {
20159 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20160 }
20161 pm_array_node_close_set(array, &closing);
20162
20163 return (pm_node_t *) array;
20164 }
20165 case PM_TOKEN_PERCENT_LOWER_W: {
20166 parser_lex(parser);
20167 pm_token_t opening = parser->previous;
20168 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20169
20170 // skip all leading whitespaces
20171 accept1(parser, PM_TOKEN_WORDS_SEP);
20172
20173 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20174 accept1(parser, PM_TOKEN_WORDS_SEP);
20175 if (match1(parser, PM_TOKEN_STRING_END)) break;
20176
20177 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20178 pm_token_t opening = not_provided(parser);
20179 pm_token_t closing = not_provided(parser);
20180
20181 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20182 pm_array_node_elements_append(array, string);
20183 }
20184
20185 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20186 }
20187
20188 pm_token_t closing = parser->current;
20189 if (match1(parser, PM_TOKEN_EOF)) {
20190 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20191 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20192 } else {
20193 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20194 }
20195
20196 pm_array_node_close_set(array, &closing);
20197 return (pm_node_t *) array;
20198 }
20199 case PM_TOKEN_PERCENT_UPPER_W: {
20200 parser_lex(parser);
20201 pm_token_t opening = parser->previous;
20202 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20203
20204 // This is the current node that we are parsing that will be added
20205 // to the list of elements.
20206 pm_node_t *current = NULL;
20207
20208 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20209 switch (parser->current.type) {
20210 case PM_TOKEN_WORDS_SEP: {
20211 // Reset the explicit encoding if we hit a separator
20212 // since each element can have its own encoding.
20213 parser->explicit_encoding = NULL;
20214
20215 if (current == NULL) {
20216 // If we hit a separator before we have any content,
20217 // then we don't need to do anything.
20218 } else {
20219 // If we hit a separator after we've hit content,
20220 // then we need to append that content to the list
20221 // and reset the current node.
20222 pm_array_node_elements_append(array, current);
20223 current = NULL;
20224 }
20225
20226 parser_lex(parser);
20227 break;
20228 }
20229 case PM_TOKEN_STRING_CONTENT: {
20230 pm_token_t opening = not_provided(parser);
20231 pm_token_t closing = not_provided(parser);
20232
20233 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20234 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20235 parser_lex(parser);
20236
20237 if (current == NULL) {
20238 // If we hit content and the current node is NULL,
20239 // then this is the first string content we've seen.
20240 // In that case we're going to create a new string
20241 // node and set that to the current.
20242 current = string;
20243 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20244 // If we hit string content and the current node is
20245 // an interpolated string, then we need to append
20246 // the string content to the list of child nodes.
20247 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20248 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20249 // If we hit string content and the current node is
20250 // a string node, then we need to convert the
20251 // current node into an interpolated string and add
20252 // the string content to the list of child nodes.
20253 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20254 pm_interpolated_string_node_append(interpolated, current);
20255 pm_interpolated_string_node_append(interpolated, string);
20256 current = (pm_node_t *) interpolated;
20257 } else {
20258 assert(false && "unreachable");
20259 }
20260
20261 break;
20262 }
20263 case PM_TOKEN_EMBVAR: {
20264 if (current == NULL) {
20265 // If we hit an embedded variable and the current
20266 // node is NULL, then this is the start of a new
20267 // string. We'll set the current node to a new
20268 // interpolated string.
20269 pm_token_t opening = not_provided(parser);
20270 pm_token_t closing = not_provided(parser);
20271 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20272 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20273 // If we hit an embedded variable and the current
20274 // node is a string node, then we'll convert the
20275 // current into an interpolated string and add the
20276 // string node to the list of parts.
20277 pm_token_t opening = not_provided(parser);
20278 pm_token_t closing = not_provided(parser);
20279 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20280 pm_interpolated_string_node_append(interpolated, current);
20281 current = (pm_node_t *) interpolated;
20282 } else {
20283 // If we hit an embedded variable and the current
20284 // node is an interpolated string, then we'll just
20285 // add the embedded variable.
20286 }
20287
20288 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20289 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20290 break;
20291 }
20292 case PM_TOKEN_EMBEXPR_BEGIN: {
20293 if (current == NULL) {
20294 // If we hit an embedded expression and the current
20295 // node is NULL, then this is the start of a new
20296 // string. We'll set the current node to a new
20297 // interpolated string.
20298 pm_token_t opening = not_provided(parser);
20299 pm_token_t closing = not_provided(parser);
20300 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20301 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20302 // If we hit an embedded expression and the current
20303 // node is a string node, then we'll convert the
20304 // current into an interpolated string and add the
20305 // string node to the list of parts.
20306 pm_token_t opening = not_provided(parser);
20307 pm_token_t closing = not_provided(parser);
20308 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20309 pm_interpolated_string_node_append(interpolated, current);
20310 current = (pm_node_t *) interpolated;
20311 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20312 // If we hit an embedded expression and the current
20313 // node is an interpolated string, then we'll just
20314 // continue on.
20315 } else {
20316 assert(false && "unreachable");
20317 }
20318
20319 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20320 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20321 break;
20322 }
20323 default:
20324 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20325 parser_lex(parser);
20326 break;
20327 }
20328 }
20329
20330 // If we have a current node, then we need to append it to the list.
20331 if (current) {
20332 pm_array_node_elements_append(array, current);
20333 }
20334
20335 pm_token_t closing = parser->current;
20336 if (match1(parser, PM_TOKEN_EOF)) {
20337 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20338 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20339 } else {
20340 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20341 }
20342
20343 pm_array_node_close_set(array, &closing);
20344 return (pm_node_t *) array;
20345 }
20346 case PM_TOKEN_REGEXP_BEGIN: {
20347 pm_token_t opening = parser->current;
20348 parser_lex(parser);
20349
20350 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20351 // If we get here, then we have an end immediately after a start. In
20352 // that case we'll create an empty content token and return an
20353 // uninterpolated regular expression.
20354 pm_token_t content = (pm_token_t) {
20355 .type = PM_TOKEN_STRING_CONTENT,
20356 .start = parser->previous.end,
20357 .end = parser->previous.end
20358 };
20359
20360 parser_lex(parser);
20361
20362 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20363 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
20364
20365 return node;
20366 }
20367
20369
20370 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20371 // In this case we've hit string content so we know the regular
20372 // expression at least has something in it. We'll need to check if the
20373 // following token is the end (in which case we can return a plain
20374 // regular expression) or if it's not then it has interpolation.
20375 pm_string_t unescaped = parser->current_string;
20376 pm_token_t content = parser->current;
20377 bool ascii_only = parser->current_regular_expression_ascii_only;
20378 parser_lex(parser);
20379
20380 // If we hit an end, then we can create a regular expression
20381 // node without interpolation, which can be represented more
20382 // succinctly and more easily compiled.
20383 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20384 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20385
20386 // If we're not immediately followed by a =~, then we want
20387 // to parse all of the errors at this point. If it is
20388 // followed by a =~, then it will get parsed higher up while
20389 // parsing the named captures as well.
20390 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20391 parse_regular_expression_errors(parser, node);
20392 }
20393
20394 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20395 return (pm_node_t *) node;
20396 }
20397
20398 // If we get here, then we have interpolation so we'll need to create
20399 // a regular expression node with interpolation.
20400 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20401
20402 pm_token_t opening = not_provided(parser);
20403 pm_token_t closing = not_provided(parser);
20404 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20405
20406 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20407 // This is extremely strange, but the first string part of a
20408 // regular expression will always be tagged as binary if we
20409 // are in a US-ASCII file, no matter its contents.
20410 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20411 }
20412
20413 pm_interpolated_regular_expression_node_append(interpolated, part);
20414 } else {
20415 // If the first part of the body of the regular expression is not a
20416 // string content, then we have interpolation and we need to create an
20417 // interpolated regular expression node.
20418 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20419 }
20420
20421 // Now that we're here and we have interpolation, we'll parse all of the
20422 // parts into the list.
20423 pm_node_t *part;
20424 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20425 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20426 pm_interpolated_regular_expression_node_append(interpolated, part);
20427 }
20428 }
20429
20430 pm_token_t closing = parser->current;
20431 if (match1(parser, PM_TOKEN_EOF)) {
20432 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20433 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20434 } else {
20435 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20436 }
20437
20438 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20439 return (pm_node_t *) interpolated;
20440 }
20441 case PM_TOKEN_BACKTICK:
20442 case PM_TOKEN_PERCENT_LOWER_X: {
20443 parser_lex(parser);
20444 pm_token_t opening = parser->previous;
20445
20446 // When we get here, we don't know if this string is going to have
20447 // interpolation or not, even though it is allowed. Still, we want to be
20448 // able to return a string node without interpolation if we can since
20449 // it'll be faster.
20450 if (match1(parser, PM_TOKEN_STRING_END)) {
20451 // If we get here, then we have an end immediately after a start. In
20452 // that case we'll create an empty content token and return an
20453 // uninterpolated string.
20454 pm_token_t content = (pm_token_t) {
20455 .type = PM_TOKEN_STRING_CONTENT,
20456 .start = parser->previous.end,
20457 .end = parser->previous.end
20458 };
20459
20460 parser_lex(parser);
20461 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20462 }
20463
20465
20466 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20467 // In this case we've hit string content so we know the string
20468 // at least has something in it. We'll need to check if the
20469 // following token is the end (in which case we can return a
20470 // plain string) or if it's not then it has interpolation.
20471 pm_string_t unescaped = parser->current_string;
20472 pm_token_t content = parser->current;
20473 parser_lex(parser);
20474
20475 if (match1(parser, PM_TOKEN_STRING_END)) {
20476 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20477 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20478 parser_lex(parser);
20479 return node;
20480 }
20481
20482 // If we get here, then we have interpolation so we'll need to
20483 // create a string node with interpolation.
20484 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20485
20486 pm_token_t opening = not_provided(parser);
20487 pm_token_t closing = not_provided(parser);
20488
20489 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20490 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20491
20492 pm_interpolated_xstring_node_append(node, part);
20493 } else {
20494 // If the first part of the body of the string is not a string
20495 // content, then we have interpolation and we need to create an
20496 // interpolated string node.
20497 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20498 }
20499
20500 pm_node_t *part;
20501 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20502 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20503 pm_interpolated_xstring_node_append(node, part);
20504 }
20505 }
20506
20507 pm_token_t closing = parser->current;
20508 if (match1(parser, PM_TOKEN_EOF)) {
20509 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20510 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20511 } else {
20512 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20513 }
20514 pm_interpolated_xstring_node_closing_set(node, &closing);
20515
20516 return (pm_node_t *) node;
20517 }
20518 case PM_TOKEN_USTAR: {
20519 parser_lex(parser);
20520
20521 // * operators at the beginning of expressions are only valid in the
20522 // context of a multiple assignment. We enforce that here. We'll
20523 // still lex past it though and create a missing node place.
20524 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20525 pm_parser_err_prefix(parser, diag_id);
20526 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20527 }
20528
20529 pm_token_t operator = parser->previous;
20530 pm_node_t *name = NULL;
20531
20532 if (token_begins_expression_p(parser->current.type)) {
20533 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20534 }
20535
20536 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20537
20538 if (match1(parser, PM_TOKEN_COMMA)) {
20539 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20540 } else {
20541 return parse_target_validate(parser, splat, true);
20542 }
20543 }
20544 case PM_TOKEN_BANG: {
20545 if (binding_power > PM_BINDING_POWER_UNARY) {
20546 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20547 }
20548
20549 parser_lex(parser);
20550
20551 pm_token_t operator = parser->previous;
20552 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20553 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20554
20555 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20556 return (pm_node_t *) node;
20557 }
20558 case PM_TOKEN_TILDE: {
20559 if (binding_power > PM_BINDING_POWER_UNARY) {
20560 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20561 }
20562 parser_lex(parser);
20563
20564 pm_token_t operator = parser->previous;
20565 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20566 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20567
20568 return (pm_node_t *) node;
20569 }
20570 case PM_TOKEN_UMINUS: {
20571 if (binding_power > PM_BINDING_POWER_UNARY) {
20572 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20573 }
20574 parser_lex(parser);
20575
20576 pm_token_t operator = parser->previous;
20577 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20578 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20579
20580 return (pm_node_t *) node;
20581 }
20582 case PM_TOKEN_UMINUS_NUM: {
20583 parser_lex(parser);
20584
20585 pm_token_t operator = parser->previous;
20586 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20587
20588 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20589 pm_token_t exponent_operator = parser->previous;
20590 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20591 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20592 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20593 } else {
20594 switch (PM_NODE_TYPE(node)) {
20595 case PM_INTEGER_NODE:
20596 case PM_FLOAT_NODE:
20597 case PM_RATIONAL_NODE:
20598 case PM_IMAGINARY_NODE:
20599 parse_negative_numeric(node);
20600 break;
20601 default:
20602 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20603 break;
20604 }
20605 }
20606
20607 return node;
20608 }
20609 case PM_TOKEN_MINUS_GREATER: {
20610 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20612
20613 size_t opening_newline_index = token_newline_index(parser);
20614 pm_accepts_block_stack_push(parser, true);
20615 parser_lex(parser);
20616
20617 pm_token_t operator = parser->previous;
20618 pm_parser_scope_push(parser, false);
20619
20620 pm_block_parameters_node_t *block_parameters;
20621
20622 switch (parser->current.type) {
20623 case PM_TOKEN_PARENTHESIS_LEFT: {
20624 pm_token_t opening = parser->current;
20625 parser_lex(parser);
20626
20627 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20628 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20629 } else {
20630 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20631 }
20632
20633 accept1(parser, PM_TOKEN_NEWLINE);
20634 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20635
20636 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20637 break;
20638 }
20639 case PM_CASE_PARAMETER: {
20640 pm_accepts_block_stack_push(parser, false);
20641 pm_token_t opening = not_provided(parser);
20642 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20643 pm_accepts_block_stack_pop(parser);
20644 break;
20645 }
20646 default: {
20647 block_parameters = NULL;
20648 break;
20649 }
20650 }
20651
20652 pm_token_t opening;
20653 pm_node_t *body = NULL;
20654 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20655
20656 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20657 opening = parser->previous;
20658
20659 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20660 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20661 }
20662
20663 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20664 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20665 } else {
20666 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20667 opening = parser->previous;
20668
20669 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20670 pm_accepts_block_stack_push(parser, true);
20671 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20672 pm_accepts_block_stack_pop(parser);
20673 }
20674
20675 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20676 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20677 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20678 } else {
20679 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20680 }
20681
20682 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20683 }
20684
20685 pm_constant_id_list_t locals;
20686 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20687 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20688
20689 pm_parser_scope_pop(parser);
20690 pm_accepts_block_stack_pop(parser);
20691
20692 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20693 }
20694 case PM_TOKEN_UPLUS: {
20695 if (binding_power > PM_BINDING_POWER_UNARY) {
20696 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20697 }
20698 parser_lex(parser);
20699
20700 pm_token_t operator = parser->previous;
20701 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20702 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20703
20704 return (pm_node_t *) node;
20705 }
20706 case PM_TOKEN_STRING_BEGIN:
20707 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20708 case PM_TOKEN_SYMBOL_BEGIN: {
20709 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20710 parser_lex(parser);
20711
20712 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20713 }
20714 default: {
20715 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20716
20717 if (recoverable != PM_CONTEXT_NONE) {
20718 parser->recovering = true;
20719
20720 // If the given error is not the generic one, then we'll add it
20721 // here because it will provide more context in addition to the
20722 // recoverable error that we will also add.
20723 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20724 pm_parser_err_prefix(parser, diag_id);
20725 }
20726
20727 // If we get here, then we are assuming this token is closing a
20728 // parent context, so we'll indicate that to the user so that
20729 // they know how we behaved.
20730 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20731 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20732 // We're going to make a special case here, because "cannot
20733 // parse expression" is pretty generic, and we know here that we
20734 // have an unexpected token.
20735 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20736 } else {
20737 pm_parser_err_prefix(parser, diag_id);
20738 }
20739
20740 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20741 }
20742 }
20743}
20744
20754static pm_node_t *
20755parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20756 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20757
20758 // Contradicting binding powers, the right-hand-side value of the assignment
20759 // allows the `rescue` modifier.
20760 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20761 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20762
20763 pm_token_t rescue = parser->current;
20764 parser_lex(parser);
20765
20766 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20767 context_pop(parser);
20768
20769 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20770 }
20771
20772 return value;
20773}
20774
20779static void
20780parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20781 switch (PM_NODE_TYPE(node)) {
20782 case PM_BEGIN_NODE: {
20783 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20784 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20785 break;
20786 }
20787 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20789 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20790 break;
20791 }
20792 case PM_PARENTHESES_NODE: {
20793 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20794 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20795 break;
20796 }
20797 case PM_STATEMENTS_NODE: {
20798 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20799 const pm_node_t *statement;
20800
20801 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20802 parse_assignment_value_local(parser, statement);
20803 }
20804 break;
20805 }
20806 default:
20807 break;
20808 }
20809}
20810
20823static pm_node_t *
20824parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20825 bool permitted = true;
20826 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20827
20828 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20829 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20830
20831 parse_assignment_value_local(parser, value);
20832 bool single_value = true;
20833
20834 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20835 single_value = false;
20836
20837 pm_token_t opening = not_provided(parser);
20838 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20839
20840 pm_array_node_elements_append(array, value);
20841 value = (pm_node_t *) array;
20842
20843 while (accept1(parser, PM_TOKEN_COMMA)) {
20844 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20845
20846 pm_array_node_elements_append(array, element);
20847 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20848
20849 parse_assignment_value_local(parser, element);
20850 }
20851 }
20852
20853 // Contradicting binding powers, the right-hand-side value of the assignment
20854 // allows the `rescue` modifier.
20855 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20856 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20857
20858 pm_token_t rescue = parser->current;
20859 parser_lex(parser);
20860
20861 bool accepts_command_call_inner = false;
20862
20863 // RHS can accept command call iff the value is a call with arguments
20864 // but without parenthesis.
20865 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20866 pm_call_node_t *call_node = (pm_call_node_t *) value;
20867 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20868 accepts_command_call_inner = true;
20869 }
20870 }
20871
20872 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20873 context_pop(parser);
20874
20875 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20876 }
20877
20878 return value;
20879}
20880
20888static void
20889parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20890 if (call_node->arguments != NULL) {
20891 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20892 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20893 call_node->arguments = NULL;
20894 }
20895
20896 if (call_node->block != NULL) {
20897 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20898 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20899 call_node->block = NULL;
20900 }
20901}
20902
20927
20928static inline const uint8_t *
20929pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20930 cursor++;
20931
20932 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20933 uint8_t value = escape_hexadecimal_digit(*cursor);
20934 cursor++;
20935
20936 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20937 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20938 cursor++;
20939 }
20940
20941 pm_buffer_append_byte(unescaped, value);
20942 } else {
20943 pm_buffer_append_string(unescaped, "\\x", 2);
20944 }
20945
20946 return cursor;
20947}
20948
20949static inline const uint8_t *
20950pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20951 uint8_t value = (uint8_t) (*cursor - '0');
20952 cursor++;
20953
20954 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20955 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20956 cursor++;
20957
20958 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20959 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20960 cursor++;
20961 }
20962 }
20963
20964 pm_buffer_append_byte(unescaped, value);
20965 return cursor;
20966}
20967
20968static inline const uint8_t *
20969pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20970 const uint8_t *start = cursor - 1;
20971 cursor++;
20972
20973 if (cursor >= end) {
20974 pm_buffer_append_string(unescaped, "\\u", 2);
20975 return cursor;
20976 }
20977
20978 if (*cursor != '{') {
20979 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20980 uint32_t value = escape_unicode(parser, cursor, length);
20981
20982 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20983 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20984 }
20985
20986 return cursor + length;
20987 }
20988
20989 cursor++;
20990 for (;;) {
20991 while (cursor < end && *cursor == ' ') cursor++;
20992
20993 if (cursor >= end) break;
20994 if (*cursor == '}') {
20995 cursor++;
20996 break;
20997 }
20998
20999 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
21000 uint32_t value = escape_unicode(parser, cursor, length);
21001
21002 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
21003 cursor += length;
21004 }
21005
21006 return cursor;
21007}
21008
21009static void
21010pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21011 const uint8_t *end = source + length;
21012 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21013
21014 for (;;) {
21015 if (++cursor >= end) {
21016 pm_buffer_append_byte(unescaped, '\\');
21017 return;
21018 }
21019
21020 switch (*cursor) {
21021 case 'x':
21022 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21023 break;
21024 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21025 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21026 break;
21027 case 'u':
21028 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21029 break;
21030 default:
21031 pm_buffer_append_byte(unescaped, '\\');
21032 break;
21033 }
21034
21035 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21036 if (next_cursor == NULL) break;
21037
21038 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21039 cursor = next_cursor;
21040 }
21041
21042 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21043}
21044
21049static void
21050parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21052
21053 pm_parser_t *parser = callback_data->parser;
21054 pm_call_node_t *call = callback_data->call;
21055 pm_constant_id_list_t *names = &callback_data->names;
21056
21057 const uint8_t *source = pm_string_source(capture);
21058 size_t length = pm_string_length(capture);
21059 pm_buffer_t unescaped = { 0 };
21060
21061 // First, we need to handle escapes within the name of the capture group.
21062 // This is because regular expressions have three different representations
21063 // in prism. The first is the plain source code. The second is the
21064 // representation that will be sent to the regular expression engine, which
21065 // is the value of the "unescaped" field. This is poorly named, because it
21066 // actually still contains escapes, just a subset of them that the regular
21067 // expression engine knows how to handle. The third representation is fully
21068 // unescaped, which is what we need.
21069 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21070 if (PRISM_UNLIKELY(cursor != NULL)) {
21071 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21072 source = (const uint8_t *) pm_buffer_value(&unescaped);
21073 length = pm_buffer_length(&unescaped);
21074 }
21075
21076 pm_location_t location;
21077 pm_constant_id_t name;
21078
21079 // If the name of the capture group isn't a valid identifier, we do
21080 // not add it to the local table.
21081 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21082 pm_buffer_free(&unescaped);
21083 return;
21084 }
21085
21086 if (callback_data->shared) {
21087 // If the unescaped string is a slice of the source, then we can
21088 // copy the names directly. The pointers will line up.
21089 location = (pm_location_t) { .start = source, .end = source + length };
21090 name = pm_parser_constant_id_location(parser, location.start, location.end);
21091 } else {
21092 // Otherwise, the name is a slice of the malloc-ed owned string,
21093 // in which case we need to copy it out into a new string.
21094 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21095
21096 void *memory = xmalloc(length);
21097 if (memory == NULL) abort();
21098
21099 memcpy(memory, source, length);
21100 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21101 }
21102
21103 // Add this name to the list of constants if it is valid, not duplicated,
21104 // and not a keyword.
21105 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21106 pm_constant_id_list_append(names, name);
21107
21108 int depth;
21109 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21110 // If the local is not already a local but it is a keyword, then we
21111 // do not want to add a capture for this.
21112 if (pm_local_is_keyword((const char *) source, length)) {
21113 pm_buffer_free(&unescaped);
21114 return;
21115 }
21116
21117 // If the identifier is not already a local, then we will add it to
21118 // the local table.
21119 pm_parser_local_add(parser, name, location.start, location.end, 0);
21120 }
21121
21122 // Here we lazily create the MatchWriteNode since we know we're
21123 // about to add a target.
21124 if (callback_data->match == NULL) {
21125 callback_data->match = pm_match_write_node_create(parser, call);
21126 }
21127
21128 // Next, create the local variable target and add it to the list of
21129 // targets for the match.
21130 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21131 pm_node_list_append(&callback_data->match->targets, target);
21132 }
21133
21134 pm_buffer_free(&unescaped);
21135}
21136
21141static pm_node_t *
21142parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21144 .parser = parser,
21145 .call = call,
21146 .names = { 0 },
21147 .shared = content->type == PM_STRING_SHARED
21148 };
21149
21151 .parser = parser,
21152 .start = call->receiver->location.start,
21153 .end = call->receiver->location.end,
21154 .shared = content->type == PM_STRING_SHARED
21155 };
21156
21157 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21158 pm_constant_id_list_free(&callback_data.names);
21159
21160 if (callback_data.match != NULL) {
21161 return (pm_node_t *) callback_data.match;
21162 } else {
21163 return (pm_node_t *) call;
21164 }
21165}
21166
21167static inline pm_node_t *
21168parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21169 pm_token_t token = parser->current;
21170
21171 switch (token.type) {
21172 case PM_TOKEN_EQUAL: {
21173 switch (PM_NODE_TYPE(node)) {
21174 case PM_CALL_NODE: {
21175 // If we have no arguments to the call node and we need this
21176 // to be a target then this is either a method call or a
21177 // local variable write. This _must_ happen before the value
21178 // is parsed because it could be referenced in the value.
21179 pm_call_node_t *call_node = (pm_call_node_t *) node;
21180 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21181 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21182 }
21183 }
21185 case PM_CASE_WRITABLE: {
21186 // When we have `it = value`, we need to add `it` as a local
21187 // variable before parsing the value, in case the value
21188 // references the variable.
21189 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21190 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
21191 }
21192
21193 parser_lex(parser);
21194 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21195
21196 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21197 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21198 }
21199
21200 return parse_write(parser, node, &token, value);
21201 }
21202 case PM_SPLAT_NODE: {
21203 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21204 pm_multi_target_node_targets_append(parser, multi_target, node);
21205
21206 parser_lex(parser);
21207 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21208 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21209 }
21210 case PM_SOURCE_ENCODING_NODE:
21211 case PM_FALSE_NODE:
21212 case PM_SOURCE_FILE_NODE:
21213 case PM_SOURCE_LINE_NODE:
21214 case PM_NIL_NODE:
21215 case PM_SELF_NODE:
21216 case PM_TRUE_NODE: {
21217 // In these special cases, we have specific error messages
21218 // and we will replace them with local variable writes.
21219 parser_lex(parser);
21220 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21221 return parse_unwriteable_write(parser, node, &token, value);
21222 }
21223 default:
21224 // In this case we have an = sign, but we don't know what
21225 // it's for. We need to treat it as an error. We'll mark it
21226 // as an error and skip past it.
21227 parser_lex(parser);
21228 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21229 return node;
21230 }
21231 }
21232 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21233 switch (PM_NODE_TYPE(node)) {
21234 case PM_BACK_REFERENCE_READ_NODE:
21235 case PM_NUMBERED_REFERENCE_READ_NODE:
21236 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21238 case PM_GLOBAL_VARIABLE_READ_NODE: {
21239 parser_lex(parser);
21240
21241 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21242 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21243
21244 pm_node_destroy(parser, node);
21245 return result;
21246 }
21247 case PM_CLASS_VARIABLE_READ_NODE: {
21248 parser_lex(parser);
21249
21250 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21251 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21252
21253 pm_node_destroy(parser, node);
21254 return result;
21255 }
21256 case PM_CONSTANT_PATH_NODE: {
21257 parser_lex(parser);
21258
21259 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21260 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21261
21262 return parse_shareable_constant_write(parser, write);
21263 }
21264 case PM_CONSTANT_READ_NODE: {
21265 parser_lex(parser);
21266
21267 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21268 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21269
21270 pm_node_destroy(parser, node);
21271 return parse_shareable_constant_write(parser, write);
21272 }
21273 case PM_INSTANCE_VARIABLE_READ_NODE: {
21274 parser_lex(parser);
21275
21276 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21277 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21278
21279 pm_node_destroy(parser, node);
21280 return result;
21281 }
21282 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21283 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21284 parser_lex(parser);
21285
21286 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21287 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21288
21289 parse_target_implicit_parameter(parser, node);
21290 pm_node_destroy(parser, node);
21291 return result;
21292 }
21293 case PM_LOCAL_VARIABLE_READ_NODE: {
21294 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21295 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21296 parse_target_implicit_parameter(parser, node);
21297 }
21298
21300 parser_lex(parser);
21301
21302 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21303 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21304
21305 pm_node_destroy(parser, node);
21306 return result;
21307 }
21308 case PM_CALL_NODE: {
21309 pm_call_node_t *cast = (pm_call_node_t *) node;
21310
21311 // If we have a vcall (a method with no arguments and no
21312 // receiver that could have been a local variable) then we
21313 // will transform it into a local variable write.
21314 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21315 pm_location_t *message_loc = &cast->message_loc;
21316 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21317
21318 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21319 parser_lex(parser);
21320
21321 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21322 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21323
21324 pm_node_destroy(parser, (pm_node_t *) cast);
21325 return result;
21326 }
21327
21328 // Move past the token here so that we have already added
21329 // the local variable by this point.
21330 parser_lex(parser);
21331
21332 // If there is no call operator and the message is "[]" then
21333 // this is an aref expression, and we can transform it into
21334 // an aset expression.
21335 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21336 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21337 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21338 }
21339
21340 // If this node cannot be writable, then we have an error.
21341 if (pm_call_node_writable_p(parser, cast)) {
21342 parse_write_name(parser, &cast->name);
21343 } else {
21344 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21345 }
21346
21347 parse_call_operator_write(parser, cast, &token);
21348 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21349 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21350 }
21351 case PM_MULTI_WRITE_NODE: {
21352 parser_lex(parser);
21353 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21354 return node;
21355 }
21356 default:
21357 parser_lex(parser);
21358
21359 // In this case we have an &&= sign, but we don't know what it's for.
21360 // We need to treat it as an error. For now, we'll mark it as an error
21361 // and just skip right past it.
21362 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21363 return node;
21364 }
21365 }
21366 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21367 switch (PM_NODE_TYPE(node)) {
21368 case PM_BACK_REFERENCE_READ_NODE:
21369 case PM_NUMBERED_REFERENCE_READ_NODE:
21370 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21372 case PM_GLOBAL_VARIABLE_READ_NODE: {
21373 parser_lex(parser);
21374
21375 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21376 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21377
21378 pm_node_destroy(parser, node);
21379 return result;
21380 }
21381 case PM_CLASS_VARIABLE_READ_NODE: {
21382 parser_lex(parser);
21383
21384 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21385 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21386
21387 pm_node_destroy(parser, node);
21388 return result;
21389 }
21390 case PM_CONSTANT_PATH_NODE: {
21391 parser_lex(parser);
21392
21393 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21394 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21395
21396 return parse_shareable_constant_write(parser, write);
21397 }
21398 case PM_CONSTANT_READ_NODE: {
21399 parser_lex(parser);
21400
21401 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21402 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21403
21404 pm_node_destroy(parser, node);
21405 return parse_shareable_constant_write(parser, write);
21406 }
21407 case PM_INSTANCE_VARIABLE_READ_NODE: {
21408 parser_lex(parser);
21409
21410 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21411 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21412
21413 pm_node_destroy(parser, node);
21414 return result;
21415 }
21416 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21417 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21418 parser_lex(parser);
21419
21420 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21421 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21422
21423 parse_target_implicit_parameter(parser, node);
21424 pm_node_destroy(parser, node);
21425 return result;
21426 }
21427 case PM_LOCAL_VARIABLE_READ_NODE: {
21428 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21429 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21430 parse_target_implicit_parameter(parser, node);
21431 }
21432
21434 parser_lex(parser);
21435
21436 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21437 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21438
21439 pm_node_destroy(parser, node);
21440 return result;
21441 }
21442 case PM_CALL_NODE: {
21443 pm_call_node_t *cast = (pm_call_node_t *) node;
21444
21445 // If we have a vcall (a method with no arguments and no
21446 // receiver that could have been a local variable) then we
21447 // will transform it into a local variable write.
21448 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21449 pm_location_t *message_loc = &cast->message_loc;
21450 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21451
21452 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21453 parser_lex(parser);
21454
21455 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21456 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21457
21458 pm_node_destroy(parser, (pm_node_t *) cast);
21459 return result;
21460 }
21461
21462 // Move past the token here so that we have already added
21463 // the local variable by this point.
21464 parser_lex(parser);
21465
21466 // If there is no call operator and the message is "[]" then
21467 // this is an aref expression, and we can transform it into
21468 // an aset expression.
21469 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21470 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21471 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21472 }
21473
21474 // If this node cannot be writable, then we have an error.
21475 if (pm_call_node_writable_p(parser, cast)) {
21476 parse_write_name(parser, &cast->name);
21477 } else {
21478 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21479 }
21480
21481 parse_call_operator_write(parser, cast, &token);
21482 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21483 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21484 }
21485 case PM_MULTI_WRITE_NODE: {
21486 parser_lex(parser);
21487 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21488 return node;
21489 }
21490 default:
21491 parser_lex(parser);
21492
21493 // In this case we have an ||= sign, but we don't know what it's for.
21494 // We need to treat it as an error. For now, we'll mark it as an error
21495 // and just skip right past it.
21496 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21497 return node;
21498 }
21499 }
21500 case PM_TOKEN_AMPERSAND_EQUAL:
21501 case PM_TOKEN_CARET_EQUAL:
21502 case PM_TOKEN_GREATER_GREATER_EQUAL:
21503 case PM_TOKEN_LESS_LESS_EQUAL:
21504 case PM_TOKEN_MINUS_EQUAL:
21505 case PM_TOKEN_PERCENT_EQUAL:
21506 case PM_TOKEN_PIPE_EQUAL:
21507 case PM_TOKEN_PLUS_EQUAL:
21508 case PM_TOKEN_SLASH_EQUAL:
21509 case PM_TOKEN_STAR_EQUAL:
21510 case PM_TOKEN_STAR_STAR_EQUAL: {
21511 switch (PM_NODE_TYPE(node)) {
21512 case PM_BACK_REFERENCE_READ_NODE:
21513 case PM_NUMBERED_REFERENCE_READ_NODE:
21514 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21516 case PM_GLOBAL_VARIABLE_READ_NODE: {
21517 parser_lex(parser);
21518
21519 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21520 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21521
21522 pm_node_destroy(parser, node);
21523 return result;
21524 }
21525 case PM_CLASS_VARIABLE_READ_NODE: {
21526 parser_lex(parser);
21527
21528 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21529 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21530
21531 pm_node_destroy(parser, node);
21532 return result;
21533 }
21534 case PM_CONSTANT_PATH_NODE: {
21535 parser_lex(parser);
21536
21537 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21538 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21539
21540 return parse_shareable_constant_write(parser, write);
21541 }
21542 case PM_CONSTANT_READ_NODE: {
21543 parser_lex(parser);
21544
21545 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21546 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21547
21548 pm_node_destroy(parser, node);
21549 return parse_shareable_constant_write(parser, write);
21550 }
21551 case PM_INSTANCE_VARIABLE_READ_NODE: {
21552 parser_lex(parser);
21553
21554 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21555 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21556
21557 pm_node_destroy(parser, node);
21558 return result;
21559 }
21560 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21561 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21562 parser_lex(parser);
21563
21564 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21565 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21566
21567 parse_target_implicit_parameter(parser, node);
21568 pm_node_destroy(parser, node);
21569 return result;
21570 }
21571 case PM_LOCAL_VARIABLE_READ_NODE: {
21572 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21573 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21574 parse_target_implicit_parameter(parser, node);
21575 }
21576
21578 parser_lex(parser);
21579
21580 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21581 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21582
21583 pm_node_destroy(parser, node);
21584 return result;
21585 }
21586 case PM_CALL_NODE: {
21587 parser_lex(parser);
21588 pm_call_node_t *cast = (pm_call_node_t *) node;
21589
21590 // If we have a vcall (a method with no arguments and no
21591 // receiver that could have been a local variable) then we
21592 // will transform it into a local variable write.
21593 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21594 pm_location_t *message_loc = &cast->message_loc;
21595 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21596
21597 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21598 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21599 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21600
21601 pm_node_destroy(parser, (pm_node_t *) cast);
21602 return result;
21603 }
21604
21605 // If there is no call operator and the message is "[]" then
21606 // this is an aref expression, and we can transform it into
21607 // an aset expression.
21608 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21609 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21610 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21611 }
21612
21613 // If this node cannot be writable, then we have an error.
21614 if (pm_call_node_writable_p(parser, cast)) {
21615 parse_write_name(parser, &cast->name);
21616 } else {
21617 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21618 }
21619
21620 parse_call_operator_write(parser, cast, &token);
21621 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21622 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21623 }
21624 case PM_MULTI_WRITE_NODE: {
21625 parser_lex(parser);
21626 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21627 return node;
21628 }
21629 default:
21630 parser_lex(parser);
21631
21632 // In this case we have an operator but we don't know what it's for.
21633 // We need to treat it as an error. For now, we'll mark it as an error
21634 // and just skip right past it.
21635 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21636 return node;
21637 }
21638 }
21639 case PM_TOKEN_AMPERSAND_AMPERSAND:
21640 case PM_TOKEN_KEYWORD_AND: {
21641 parser_lex(parser);
21642
21643 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21644 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21645 }
21646 case PM_TOKEN_KEYWORD_OR:
21647 case PM_TOKEN_PIPE_PIPE: {
21648 parser_lex(parser);
21649
21650 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21651 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21652 }
21653 case PM_TOKEN_EQUAL_TILDE: {
21654 // Note that we _must_ parse the value before adding the local
21655 // variables in order to properly mirror the behavior of Ruby. For
21656 // example,
21657 //
21658 // /(?<foo>bar)/ =~ foo
21659 //
21660 // In this case, `foo` should be a method call and not a local yet.
21661 parser_lex(parser);
21662 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21663
21664 // By default, we're going to create a call node and then return it.
21665 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21666 pm_node_t *result = (pm_node_t *) call;
21667
21668 // If the receiver of this =~ is a regular expression node, then we
21669 // need to introduce local variables for it based on its named
21670 // capture groups.
21671 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21672 // It's possible to have an interpolated regular expression node
21673 // that only contains strings. This is because it can be split
21674 // up by a heredoc. In this case we need to concat the unescaped
21675 // strings together and then parse them as a regular expression.
21677
21678 bool interpolated = false;
21679 size_t total_length = 0;
21680
21681 pm_node_t *part;
21682 PM_NODE_LIST_FOREACH(parts, index, part) {
21683 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21684 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21685 } else {
21686 interpolated = true;
21687 break;
21688 }
21689 }
21690
21691 if (!interpolated && total_length > 0) {
21692 void *memory = xmalloc(total_length);
21693 if (!memory) abort();
21694
21695 uint8_t *cursor = memory;
21696 PM_NODE_LIST_FOREACH(parts, index, part) {
21697 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21698 size_t length = pm_string_length(unescaped);
21699
21700 memcpy(cursor, pm_string_source(unescaped), length);
21701 cursor += length;
21702 }
21703
21704 pm_string_t owned;
21705 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21706
21707 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21708 pm_string_free(&owned);
21709 }
21710 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21711 // If we have a regular expression node, then we can just parse
21712 // the named captures directly off the unescaped string.
21713 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21714 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21715 }
21716
21717 return result;
21718 }
21719 case PM_TOKEN_UAMPERSAND:
21720 case PM_TOKEN_USTAR:
21721 case PM_TOKEN_USTAR_STAR:
21722 // The only times this will occur are when we are in an error state,
21723 // but we'll put them in here so that errors can propagate.
21724 case PM_TOKEN_BANG_EQUAL:
21725 case PM_TOKEN_BANG_TILDE:
21726 case PM_TOKEN_EQUAL_EQUAL:
21727 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21728 case PM_TOKEN_LESS_EQUAL_GREATER:
21729 case PM_TOKEN_CARET:
21730 case PM_TOKEN_PIPE:
21731 case PM_TOKEN_AMPERSAND:
21732 case PM_TOKEN_GREATER_GREATER:
21733 case PM_TOKEN_LESS_LESS:
21734 case PM_TOKEN_MINUS:
21735 case PM_TOKEN_PLUS:
21736 case PM_TOKEN_PERCENT:
21737 case PM_TOKEN_SLASH:
21738 case PM_TOKEN_STAR:
21739 case PM_TOKEN_STAR_STAR: {
21740 parser_lex(parser);
21741 pm_token_t operator = parser->previous;
21742 switch (PM_NODE_TYPE(node)) {
21743 case PM_RESCUE_MODIFIER_NODE: {
21745 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21746 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21747 }
21748 break;
21749 }
21750 case PM_AND_NODE: {
21751 pm_and_node_t *cast = (pm_and_node_t *) node;
21752 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21753 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21754 }
21755 break;
21756 }
21757 case PM_OR_NODE: {
21758 pm_or_node_t *cast = (pm_or_node_t *) node;
21759 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21760 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21761 }
21762 break;
21763 }
21764 default:
21765 break;
21766 }
21767
21768 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21769 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21770 }
21771 case PM_TOKEN_GREATER:
21772 case PM_TOKEN_GREATER_EQUAL:
21773 case PM_TOKEN_LESS:
21774 case PM_TOKEN_LESS_EQUAL: {
21775 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21776 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21777 }
21778
21779 parser_lex(parser);
21780 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21781 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21782 }
21783 case PM_TOKEN_AMPERSAND_DOT:
21784 case PM_TOKEN_DOT: {
21785 parser_lex(parser);
21786 pm_token_t operator = parser->previous;
21787 pm_arguments_t arguments = { 0 };
21788
21789 // This if statement handles the foo.() syntax.
21790 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21791 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21792 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21793 }
21794
21795 switch (PM_NODE_TYPE(node)) {
21796 case PM_RESCUE_MODIFIER_NODE: {
21798 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21799 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21800 }
21801 break;
21802 }
21803 case PM_AND_NODE: {
21804 pm_and_node_t *cast = (pm_and_node_t *) node;
21805 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21806 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21807 }
21808 break;
21809 }
21810 case PM_OR_NODE: {
21811 pm_or_node_t *cast = (pm_or_node_t *) node;
21812 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21813 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21814 }
21815 break;
21816 }
21817 default:
21818 break;
21819 }
21820
21821 pm_token_t message;
21822
21823 switch (parser->current.type) {
21824 case PM_CASE_OPERATOR:
21825 case PM_CASE_KEYWORD:
21826 case PM_TOKEN_CONSTANT:
21827 case PM_TOKEN_IDENTIFIER:
21828 case PM_TOKEN_METHOD_NAME: {
21829 parser_lex(parser);
21830 message = parser->previous;
21831 break;
21832 }
21833 default: {
21834 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21835 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21836 }
21837 }
21838
21839 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21840 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21841
21842 if (
21843 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21844 arguments.arguments == NULL &&
21845 arguments.opening_loc.start == NULL &&
21846 match1(parser, PM_TOKEN_COMMA)
21847 ) {
21848 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21849 } else {
21850 return (pm_node_t *) call;
21851 }
21852 }
21853 case PM_TOKEN_DOT_DOT:
21854 case PM_TOKEN_DOT_DOT_DOT: {
21855 parser_lex(parser);
21856
21857 pm_node_t *right = NULL;
21858 if (token_begins_expression_p(parser->current.type)) {
21859 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21860 }
21861
21862 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21863 }
21864 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21865 pm_token_t keyword = parser->current;
21866 parser_lex(parser);
21867
21868 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21869 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21870 }
21871 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21872 pm_token_t keyword = parser->current;
21873 parser_lex(parser);
21874
21875 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21876 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21877 }
21878 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21879 parser_lex(parser);
21880 pm_statements_node_t *statements = pm_statements_node_create(parser);
21881 pm_statements_node_body_append(parser, statements, node, true);
21882
21883 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21884 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21885 }
21886 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21887 parser_lex(parser);
21888 pm_statements_node_t *statements = pm_statements_node_create(parser);
21889 pm_statements_node_body_append(parser, statements, node, true);
21890
21891 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21892 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21893 }
21894 case PM_TOKEN_QUESTION_MARK: {
21895 context_push(parser, PM_CONTEXT_TERNARY);
21896 pm_node_list_t current_block_exits = { 0 };
21897 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21898
21899 pm_token_t qmark = parser->current;
21900 parser_lex(parser);
21901
21902 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21903
21904 if (parser->recovering) {
21905 // If parsing the true expression of this ternary resulted in a syntax
21906 // error that we can recover from, then we're going to put missing nodes
21907 // and tokens into the remaining places. We want to be sure to do this
21908 // before the `expect` function call to make sure it doesn't
21909 // accidentally move past a ':' token that occurs after the syntax
21910 // error.
21911 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21912 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21913
21914 context_pop(parser);
21915 pop_block_exits(parser, previous_block_exits);
21916 pm_node_list_free(&current_block_exits);
21917
21918 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21919 }
21920
21921 accept1(parser, PM_TOKEN_NEWLINE);
21922 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21923
21924 pm_token_t colon = parser->previous;
21925 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21926
21927 context_pop(parser);
21928 pop_block_exits(parser, previous_block_exits);
21929 pm_node_list_free(&current_block_exits);
21930
21931 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21932 }
21933 case PM_TOKEN_COLON_COLON: {
21934 parser_lex(parser);
21935 pm_token_t delimiter = parser->previous;
21936
21937 switch (parser->current.type) {
21938 case PM_TOKEN_CONSTANT: {
21939 parser_lex(parser);
21940 pm_node_t *path;
21941
21942 if (
21943 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21944 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21945 ) {
21946 // If we have a constant immediately following a '::' operator, then
21947 // this can either be a constant path or a method call, depending on
21948 // what follows the constant.
21949 //
21950 // If we have parentheses, then this is a method call. That would
21951 // look like Foo::Bar().
21952 pm_token_t message = parser->previous;
21953 pm_arguments_t arguments = { 0 };
21954
21955 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21956 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21957 } else {
21958 // Otherwise, this is a constant path. That would look like Foo::Bar.
21959 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21960 }
21961
21962 // If this is followed by a comma then it is a multiple assignment.
21963 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21964 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21965 }
21966
21967 return path;
21968 }
21969 case PM_CASE_OPERATOR:
21970 case PM_CASE_KEYWORD:
21971 case PM_TOKEN_IDENTIFIER:
21972 case PM_TOKEN_METHOD_NAME: {
21973 parser_lex(parser);
21974 pm_token_t message = parser->previous;
21975
21976 // If we have an identifier following a '::' operator, then it is for
21977 // sure a method call.
21978 pm_arguments_t arguments = { 0 };
21979 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21980 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21981
21982 // If this is followed by a comma then it is a multiple assignment.
21983 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21984 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21985 }
21986
21987 return (pm_node_t *) call;
21988 }
21989 case PM_TOKEN_PARENTHESIS_LEFT: {
21990 // If we have a parenthesis following a '::' operator, then it is the
21991 // method call shorthand. That would look like Foo::(bar).
21992 pm_arguments_t arguments = { 0 };
21993 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21994
21995 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21996 }
21997 default: {
21998 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21999 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22000 }
22001 }
22002 }
22003 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
22004 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
22005 parser_lex(parser);
22006 accept1(parser, PM_TOKEN_NEWLINE);
22007
22008 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
22009 context_pop(parser);
22010
22011 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
22012 }
22013 case PM_TOKEN_BRACKET_LEFT: {
22014 parser_lex(parser);
22015
22016 pm_arguments_t arguments = { 0 };
22017 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22018
22019 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22020 pm_accepts_block_stack_push(parser, true);
22021 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22022 pm_accepts_block_stack_pop(parser);
22023 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22024 }
22025
22026 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22027
22028 // If we have a comma after the closing bracket then this is a multiple
22029 // assignment and we should parse the targets.
22030 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22031 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22032 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22033 }
22034
22035 // If we're at the end of the arguments, we can now check if there is a
22036 // block node that starts with a {. If there is, then we can parse it and
22037 // add it to the arguments.
22038 pm_block_node_t *block = NULL;
22039 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22040 block = parse_block(parser, (uint16_t) (depth + 1));
22041 pm_arguments_validate_block(parser, &arguments, block);
22042 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22043 block = parse_block(parser, (uint16_t) (depth + 1));
22044 }
22045
22046 if (block != NULL) {
22047 if (arguments.block != NULL) {
22048 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22049 if (arguments.arguments == NULL) {
22050 arguments.arguments = pm_arguments_node_create(parser);
22051 }
22052 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22053 }
22054
22055 arguments.block = (pm_node_t *) block;
22056 }
22057
22058 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22059 }
22060 case PM_TOKEN_KEYWORD_IN: {
22061 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22062 parser->pattern_matching_newlines = true;
22063
22064 pm_token_t operator = parser->current;
22065 parser->command_start = false;
22066 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22067 parser_lex(parser);
22068
22069 pm_constant_id_list_t captures = { 0 };
22070 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22071
22072 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22073 pm_constant_id_list_free(&captures);
22074
22075 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22076 }
22077 case PM_TOKEN_EQUAL_GREATER: {
22078 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22079 parser->pattern_matching_newlines = true;
22080
22081 pm_token_t operator = parser->current;
22082 parser->command_start = false;
22083 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22084 parser_lex(parser);
22085
22086 pm_constant_id_list_t captures = { 0 };
22087 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22088
22089 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22090 pm_constant_id_list_free(&captures);
22091
22092 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22093 }
22094 default:
22095 assert(false && "unreachable");
22096 return NULL;
22097 }
22098}
22099
22100#undef PM_PARSE_PATTERN_SINGLE
22101#undef PM_PARSE_PATTERN_TOP
22102#undef PM_PARSE_PATTERN_MULTI
22103
22108static inline bool
22109pm_call_node_command_p(const pm_call_node_t *node) {
22110 return (
22111 (node->opening_loc.start == NULL) &&
22112 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22113 (node->arguments != NULL || node->block != NULL)
22114 );
22115}
22116
22125static pm_node_t *
22126parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22127 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22128 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22129 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22130 }
22131
22132 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22133
22134 switch (PM_NODE_TYPE(node)) {
22135 case PM_MISSING_NODE:
22136 // If we found a syntax error, then the type of node returned by
22137 // parse_expression_prefix is going to be a missing node.
22138 return node;
22139 case PM_PRE_EXECUTION_NODE:
22140 case PM_POST_EXECUTION_NODE:
22141 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22142 case PM_ALIAS_METHOD_NODE:
22143 case PM_MULTI_WRITE_NODE:
22144 case PM_UNDEF_NODE:
22145 // These expressions are statements, and cannot be followed by
22146 // operators (except modifiers).
22147 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22148 return node;
22149 }
22150 break;
22151 case PM_CALL_NODE:
22152 // If we have a call node, then we need to check if it looks like a
22153 // method call without parentheses that contains arguments. If it
22154 // does, then it has different rules for parsing infix operators,
22155 // namely that it only accepts composition (and/or) and modifiers
22156 // (if/unless/etc.).
22157 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22158 return node;
22159 }
22160 break;
22161 case PM_SYMBOL_NODE:
22162 // If we have a symbol node that is being parsed as a label, then we
22163 // need to immediately return, because there should never be an
22164 // infix operator following this node.
22165 if (pm_symbol_node_label_p(node)) {
22166 return node;
22167 }
22168 break;
22169 default:
22170 break;
22171 }
22172
22173 // Otherwise we'll look and see if the next token can be parsed as an infix
22174 // operator. If it can, then we'll parse it using parse_expression_infix.
22175 pm_binding_powers_t current_binding_powers;
22176 pm_token_type_t current_token_type;
22177
22178 while (
22179 current_token_type = parser->current.type,
22180 current_binding_powers = pm_binding_powers[current_token_type],
22181 binding_power <= current_binding_powers.left &&
22182 current_binding_powers.binary
22183 ) {
22184 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22185
22186 if (context_terminator(parser->current_context->context, &parser->current)) {
22187 // If this token terminates the current context, then we need to
22188 // stop parsing the expression, as it has become a statement.
22189 return node;
22190 }
22191
22192 switch (PM_NODE_TYPE(node)) {
22193 case PM_MULTI_WRITE_NODE:
22194 // Multi-write nodes are statements, and cannot be followed by
22195 // operators except modifiers.
22196 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22197 return node;
22198 }
22199 break;
22200 case PM_CLASS_VARIABLE_WRITE_NODE:
22201 case PM_CONSTANT_PATH_WRITE_NODE:
22202 case PM_CONSTANT_WRITE_NODE:
22203 case PM_GLOBAL_VARIABLE_WRITE_NODE:
22204 case PM_INSTANCE_VARIABLE_WRITE_NODE:
22205 case PM_LOCAL_VARIABLE_WRITE_NODE:
22206 // These expressions are statements, by virtue of the right-hand
22207 // side of their write being an implicit array.
22208 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22209 return node;
22210 }
22211 break;
22212 case PM_CALL_NODE:
22213 // These expressions are also statements, by virtue of the
22214 // right-hand side of the expression (i.e., the last argument to
22215 // the call node) being an implicit array.
22216 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22217 return node;
22218 }
22219 break;
22220 default:
22221 break;
22222 }
22223
22224 // If the operator is nonassoc and we should not be able to parse the
22225 // upcoming infix operator, break.
22226 if (current_binding_powers.nonassoc) {
22227 // If this is a non-assoc operator and we are about to parse the
22228 // exact same operator, then we need to add an error.
22229 if (match1(parser, current_token_type)) {
22230 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22231 break;
22232 }
22233
22234 // If this is an endless range, then we need to reject a couple of
22235 // additional operators because it violates the normal operator
22236 // precedence rules. Those patterns are:
22237 //
22238 // 1.. & 2
22239 // 1.. * 2
22240 //
22241 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22242 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22243 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22244 break;
22245 }
22246
22247 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22248 break;
22249 }
22250 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22251 break;
22252 }
22253 }
22254
22255 if (accepts_command_call) {
22256 // A command-style method call is only accepted on method chains.
22257 // Thus, we check whether the parsed node can continue method chains.
22258 // The method chain can continue if the parsed node is one of the following five kinds:
22259 // (1) index access: foo[1]
22260 // (2) attribute access: foo.bar
22261 // (3) method call with parenthesis: foo.bar(1)
22262 // (4) method call with a block: foo.bar do end
22263 // (5) constant path: foo::Bar
22264 switch (node->type) {
22265 case PM_CALL_NODE: {
22266 pm_call_node_t *cast = (pm_call_node_t *)node;
22267 if (
22268 // (1) foo[1]
22269 !(
22270 cast->call_operator_loc.start == NULL &&
22271 cast->message_loc.start != NULL &&
22272 cast->message_loc.start[0] == '[' &&
22273 cast->message_loc.end[-1] == ']'
22274 ) &&
22275 // (2) foo.bar
22276 !(
22277 cast->call_operator_loc.start != NULL &&
22278 cast->arguments == NULL &&
22279 cast->block == NULL &&
22280 cast->opening_loc.start == NULL
22281 ) &&
22282 // (3) foo.bar(1)
22283 !(
22284 cast->call_operator_loc.start != NULL &&
22285 cast->opening_loc.start != NULL
22286 ) &&
22287 // (4) foo.bar do end
22288 !(
22289 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22290 )
22291 ) {
22292 accepts_command_call = false;
22293 }
22294 break;
22295 }
22296 // (5) foo::Bar
22297 case PM_CONSTANT_PATH_NODE:
22298 break;
22299 default:
22300 accepts_command_call = false;
22301 break;
22302 }
22303 }
22304 }
22305
22306 return node;
22307}
22308
22313static pm_statements_node_t *
22314wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22315 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22316 if (statements == NULL) {
22317 statements = pm_statements_node_create(parser);
22318 }
22319
22320 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22321 pm_arguments_node_arguments_append(
22322 arguments,
22323 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22324 );
22325
22326 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22327 parser,
22328 arguments,
22329 pm_parser_constant_id_constant(parser, "print", 5)
22330 ), true);
22331 }
22332
22333 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22334 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22335 if (statements == NULL) {
22336 statements = pm_statements_node_create(parser);
22337 }
22338
22339 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22340 pm_arguments_node_arguments_append(
22341 arguments,
22342 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22343 );
22344
22345 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22346 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22347
22348 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22349 parser,
22350 pm_parser_constant_id_constant(parser, "$F", 2),
22351 (pm_node_t *) call
22352 );
22353
22354 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22355 }
22356
22357 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22358 pm_arguments_node_arguments_append(
22359 arguments,
22360 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22361 );
22362
22363 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22364 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22365 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22366 parser,
22367 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22368 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22369 (pm_node_t *) pm_true_node_synthesized_create(parser)
22370 ));
22371
22372 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22373 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22374 }
22375
22376 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22377 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22378 parser,
22379 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22380 statements
22381 ), true);
22382
22383 statements = wrapped_statements;
22384 }
22385
22386 return statements;
22387}
22388
22392static pm_node_t *
22393parse_program(pm_parser_t *parser) {
22394 // If the current scope is NULL, then we want to push a new top level scope.
22395 // The current scope could exist in the event that we are parsing an eval
22396 // and the user has passed into scopes that already exist.
22397 if (parser->current_scope == NULL) {
22398 pm_parser_scope_push(parser, true);
22399 }
22400
22401 pm_node_list_t current_block_exits = { 0 };
22402 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22403
22404 parser_lex(parser);
22405 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22406
22407 if (statements != NULL && !parser->parsing_eval) {
22408 // If we have statements, then the top-level statement should be
22409 // explicitly checked as well. We have to do this here because
22410 // everywhere else we check all but the last statement.
22411 assert(statements->body.size > 0);
22412 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22413 }
22414
22415 pm_constant_id_list_t locals;
22416 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22417 pm_parser_scope_pop(parser);
22418
22419 // At the top level, see if we need to wrap the statements in a program
22420 // node with a while loop based on the options.
22422 statements = wrap_statements(parser, statements);
22423 } else {
22424 flush_block_exits(parser, previous_block_exits);
22425 pm_node_list_free(&current_block_exits);
22426 }
22427
22428 // If this is an empty file, then we're still going to parse all of the
22429 // statements in order to gather up all of the comments and such. Here we'll
22430 // correct the location information.
22431 if (statements == NULL) {
22432 statements = pm_statements_node_create(parser);
22433 pm_statements_node_location_set(statements, parser->start, parser->start);
22434 }
22435
22436 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22437}
22438
22439/******************************************************************************/
22440/* External functions */
22441/******************************************************************************/
22442
22452static const char *
22453pm_strnstr(const char *big, const char *little, size_t big_length) {
22454 size_t little_length = strlen(little);
22455
22456 for (const char *big_end = big + big_length; big < big_end; big++) {
22457 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22458 }
22459
22460 return NULL;
22461}
22462
22463#ifdef _WIN32
22464#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22465#else
22471static void
22472pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22473 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22474 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22475 }
22476}
22477#endif
22478
22483static void
22484pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22485 const char *switches = pm_strnstr(engine, " -", length);
22486 if (switches == NULL) return;
22487
22488 pm_options_t next_options = *options;
22489 options->shebang_callback(
22490 &next_options,
22491 (const uint8_t *) (switches + 1),
22492 length - ((size_t) (switches - engine)) - 1,
22493 options->shebang_callback_data
22494 );
22495
22496 size_t encoding_length;
22497 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22498 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22499 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22500 }
22501
22502 parser->command_line = next_options.command_line;
22503 parser->frozen_string_literal = next_options.frozen_string_literal;
22504}
22505
22510pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22511 assert(source != NULL);
22512
22513 *parser = (pm_parser_t) {
22514 .node_id = 0,
22515 .lex_state = PM_LEX_STATE_BEG,
22516 .enclosure_nesting = 0,
22517 .lambda_enclosure_nesting = -1,
22518 .brace_nesting = 0,
22519 .do_loop_stack = 0,
22520 .accepts_block_stack = 0,
22521 .lex_modes = {
22522 .index = 0,
22523 .stack = {{ .mode = PM_LEX_DEFAULT }},
22524 .current = &parser->lex_modes.stack[0],
22525 },
22526 .start = source,
22527 .end = source + size,
22528 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22529 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22530 .next_start = NULL,
22531 .heredoc_end = NULL,
22532 .data_loc = { .start = NULL, .end = NULL },
22533 .comment_list = { 0 },
22534 .magic_comment_list = { 0 },
22535 .warning_list = { 0 },
22536 .error_list = { 0 },
22537 .current_scope = NULL,
22538 .current_context = NULL,
22539 .encoding = PM_ENCODING_UTF_8_ENTRY,
22540 .encoding_changed_callback = NULL,
22541 .encoding_comment_start = source,
22542 .lex_callback = NULL,
22543 .filepath = { 0 },
22544 .constant_pool = { 0 },
22545 .newline_list = { 0 },
22546 .integer_base = 0,
22547 .current_string = PM_STRING_EMPTY,
22548 .start_line = 1,
22549 .explicit_encoding = NULL,
22550 .command_line = 0,
22551 .parsing_eval = false,
22552 .partial_script = false,
22553 .command_start = true,
22554 .recovering = false,
22555 .encoding_locked = false,
22556 .encoding_changed = false,
22557 .pattern_matching_newlines = false,
22558 .in_keyword_arg = false,
22559 .current_block_exits = NULL,
22560 .semantic_token_seen = false,
22561 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22562 .current_regular_expression_ascii_only = false,
22563 .warn_mismatched_indentation = true
22564 };
22565
22566 // Initialize the constant pool. We're going to completely guess as to the
22567 // number of constants that we'll need based on the size of the input. The
22568 // ratio we chose here is actually less arbitrary than you might think.
22569 //
22570 // We took ~50K Ruby files and measured the size of the file versus the
22571 // number of constants that were found in those files. Then we found the
22572 // average and standard deviation of the ratios of constants/bytesize. Then
22573 // we added 1.34 standard deviations to the average to get a ratio that
22574 // would fit 75% of the files (for a two-tailed distribution). This works
22575 // because there was about a 0.77 correlation and the distribution was
22576 // roughly normal.
22577 //
22578 // This ratio will need to change if we add more constants to the constant
22579 // pool for another node type.
22580 uint32_t constant_size = ((uint32_t) size) / 95;
22581 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22582
22583 // Initialize the newline list. Similar to the constant pool, we're going to
22584 // guess at the number of newlines that we'll need based on the size of the
22585 // input.
22586 size_t newline_size = size / 22;
22587 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22588
22589 // If options were provided to this parse, establish them here.
22590 if (options != NULL) {
22591 // filepath option
22592 parser->filepath = options->filepath;
22593
22594 // line option
22595 parser->start_line = options->line;
22596
22597 // encoding option
22598 size_t encoding_length = pm_string_length(&options->encoding);
22599 if (encoding_length > 0) {
22600 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22601 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22602 }
22603
22604 // encoding_locked option
22605 parser->encoding_locked = options->encoding_locked;
22606
22607 // frozen_string_literal option
22609
22610 // command_line option
22611 parser->command_line = options->command_line;
22612
22613 // version option
22614 parser->version = options->version;
22615
22616 // partial_script
22617 parser->partial_script = options->partial_script;
22618
22619 // scopes option
22620 parser->parsing_eval = options->scopes_count > 0;
22621 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22622
22623 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22624 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22625 pm_parser_scope_push(parser, scope_index == 0);
22626
22627 // Scopes given from the outside are not allowed to have numbered
22628 // parameters.
22629 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22630
22631 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22632 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22633
22634 const uint8_t *source = pm_string_source(local);
22635 size_t length = pm_string_length(local);
22636
22637 void *allocated = xmalloc(length);
22638 if (allocated == NULL) continue;
22639
22640 memcpy(allocated, source, length);
22641 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22642 }
22643 }
22644 }
22645
22646 // Now that we have established the user-provided options, check if
22647 // a version was given and parse as the latest version otherwise.
22648 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22650 }
22651
22652 pm_accepts_block_stack_push(parser, true);
22653
22654 // Skip past the UTF-8 BOM if it exists.
22655 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22656 parser->current.end += 3;
22657 parser->encoding_comment_start += 3;
22658
22659 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22661 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22662 }
22663 }
22664
22665 // If the -x command line flag is set, or the first shebang of the file does
22666 // not include "ruby", then we'll search for a shebang that does include
22667 // "ruby" and start parsing from there.
22668 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22669
22670 // If the first two bytes of the source are a shebang, then we will do a bit
22671 // of extra processing.
22672 //
22673 // First, we'll indicate that the encoding comment is at the end of the
22674 // shebang. This means that when a shebang is present the encoding comment
22675 // can begin on the second line.
22676 //
22677 // Second, we will check if the shebang includes "ruby". If it does, then we
22678 // we will start parsing from there. We will also potentially warning the
22679 // user if there is a carriage return at the end of the shebang. We will
22680 // also potentially call the shebang callback if this is the main script to
22681 // allow the caller to parse the shebang and find any command-line options.
22682 // If the shebang does not include "ruby" and this is the main script being
22683 // parsed, then we will start searching the file for a shebang that does
22684 // contain "ruby" as if -x were passed on the command line.
22685 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22686 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22687
22688 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22689 const char *engine;
22690
22691 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22692 if (newline != NULL) {
22693 parser->encoding_comment_start = newline + 1;
22694
22695 if (options == NULL || options->main_script) {
22696 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22697 }
22698 }
22699
22700 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22701 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22702 }
22703
22704 search_shebang = false;
22705 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22706 search_shebang = true;
22707 }
22708 }
22709
22710 // Here we're going to find the first shebang that includes "ruby" and start
22711 // parsing from there.
22712 if (search_shebang) {
22713 // If a shebang that includes "ruby" is not found, then we're going to a
22714 // a load error to the list of errors on the parser.
22715 bool found_shebang = false;
22716
22717 // This is going to point to the start of each line as we check it.
22718 // We'll maintain a moving window looking at each line at they come.
22719 const uint8_t *cursor = parser->start;
22720
22721 // The newline pointer points to the end of the current line that we're
22722 // considering. If it is NULL, then we're at the end of the file.
22723 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22724
22725 while (newline != NULL) {
22726 pm_newline_list_append(&parser->newline_list, newline);
22727
22728 cursor = newline + 1;
22729 newline = next_newline(cursor, parser->end - cursor);
22730
22731 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22732 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22733 const char *engine;
22734 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22735 found_shebang = true;
22736
22737 if (newline != NULL) {
22738 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22739 parser->encoding_comment_start = newline + 1;
22740 }
22741
22742 if (options != NULL && options->shebang_callback != NULL) {
22743 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22744 }
22745
22746 break;
22747 }
22748 }
22749 }
22750
22751 if (found_shebang) {
22752 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22753 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22754 } else {
22755 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22756 pm_newline_list_clear(&parser->newline_list);
22757 }
22758 }
22759
22760 // The encoding comment can start after any amount of inline whitespace, so
22761 // here we'll advance it to the first non-inline-whitespace character so
22762 // that it is ready for future comparisons.
22763 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22764}
22765
22771pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22772 parser->encoding_changed_callback = callback;
22773}
22774
22778static inline void
22779pm_comment_list_free(pm_list_t *list) {
22780 pm_list_node_t *node, *next;
22781
22782 for (node = list->head; node != NULL; node = next) {
22783 next = node->next;
22784
22785 pm_comment_t *comment = (pm_comment_t *) node;
22786 xfree(comment);
22787 }
22788}
22789
22793static inline void
22794pm_magic_comment_list_free(pm_list_t *list) {
22795 pm_list_node_t *node, *next;
22796
22797 for (node = list->head; node != NULL; node = next) {
22798 next = node->next;
22799
22802 }
22803}
22804
22809pm_parser_free(pm_parser_t *parser) {
22810 pm_string_free(&parser->filepath);
22811 pm_diagnostic_list_free(&parser->error_list);
22812 pm_diagnostic_list_free(&parser->warning_list);
22813 pm_comment_list_free(&parser->comment_list);
22814 pm_magic_comment_list_free(&parser->magic_comment_list);
22815 pm_constant_pool_free(&parser->constant_pool);
22816 pm_newline_list_free(&parser->newline_list);
22817
22818 while (parser->current_scope != NULL) {
22819 // Normally, popping the scope doesn't free the locals since it is
22820 // assumed that ownership has transferred to the AST. However if we have
22821 // scopes while we're freeing the parser, it's likely they came from
22822 // eval scopes and we need to free them explicitly here.
22823 pm_parser_scope_pop(parser);
22824 }
22825
22826 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22827 lex_mode_pop(parser);
22828 }
22829}
22830
22835pm_parse(pm_parser_t *parser) {
22836 return parse_program(parser);
22837}
22838
22844static bool
22845pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22846#define LINE_SIZE 4096
22847 char line[LINE_SIZE];
22848
22849 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22850 size_t length = LINE_SIZE;
22851 while (length > 0 && line[length - 1] == '\n') length--;
22852
22853 if (length == LINE_SIZE) {
22854 // If we read a line that is the maximum size and it doesn't end
22855 // with a newline, then we'll just append it to the buffer and
22856 // continue reading.
22857 length--;
22858 pm_buffer_append_string(buffer, line, length);
22859 continue;
22860 }
22861
22862 // Append the line to the buffer.
22863 length--;
22864 pm_buffer_append_string(buffer, line, length);
22865
22866 // Check if the line matches the __END__ marker. If it does, then stop
22867 // reading and return false. In most circumstances, this means we should
22868 // stop reading from the stream so that the DATA constant can pick it
22869 // up.
22870 switch (length) {
22871 case 7:
22872 if (strncmp(line, "__END__", 7) == 0) return false;
22873 break;
22874 case 8:
22875 if (strncmp(line, "__END__\n", 8) == 0) return false;
22876 break;
22877 case 9:
22878 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22879 break;
22880 }
22881
22882 // All data should be read via gets. If the string returned by gets
22883 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22884 if (stream_feof(stream)) {
22885 break;
22886 }
22887 }
22888
22889 return true;
22890#undef LINE_SIZE
22891}
22892
22902static bool
22903pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22904 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22905
22906 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22907 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22908 return true;
22909 }
22910 }
22911
22912 return false;
22913}
22914
22922pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22923 pm_buffer_init(buffer);
22924
22925 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22926
22927 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22928 pm_node_t *node = pm_parse(parser);
22929
22930 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22931 pm_node_destroy(parser, node);
22932 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22933
22934 pm_parser_free(parser);
22935 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22936 node = pm_parse(parser);
22937 }
22938
22939 return node;
22940}
22941
22946pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22947 pm_options_t options = { 0 };
22948 pm_options_read(&options, data);
22949
22950 pm_parser_t parser;
22951 pm_parser_init(&parser, source, size, &options);
22952
22953 pm_node_t *node = pm_parse(&parser);
22954 pm_node_destroy(&parser, node);
22955
22956 bool result = parser.error_list.size == 0;
22957 pm_parser_free(&parser);
22958 pm_options_free(&options);
22959
22960 return result;
22961}
22962
22963#undef PM_CASE_KEYWORD
22964#undef PM_CASE_OPERATOR
22965#undef PM_CASE_WRITABLE
22966#undef PM_STRING_EMPTY
22967#undef PM_LOCATION_NODE_BASE_VALUE
22968#undef PM_LOCATION_NODE_VALUE
22969#undef PM_LOCATION_NULL_VALUE
22970#undef PM_LOCATION_TOKEN_VALUE
22971
22972// We optionally support serializing to a binary string. For systems that don't
22973// want or need this functionality, it can be turned off with the
22974// PRISM_EXCLUDE_SERIALIZATION define.
22975#ifndef PRISM_EXCLUDE_SERIALIZATION
22976
22977static inline void
22978pm_serialize_header(pm_buffer_t *buffer) {
22979 pm_buffer_append_string(buffer, "PRISM", 5);
22980 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22981 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22982 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22983 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22984}
22985
22990pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22991 pm_serialize_header(buffer);
22992 pm_serialize_content(parser, node, buffer);
22993 pm_buffer_append_byte(buffer, '\0');
22994}
22995
23001pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23002 pm_options_t options = { 0 };
23003 pm_options_read(&options, data);
23004
23005 pm_parser_t parser;
23006 pm_parser_init(&parser, source, size, &options);
23007
23008 pm_node_t *node = pm_parse(&parser);
23009
23010 pm_serialize_header(buffer);
23011 pm_serialize_content(&parser, node, buffer);
23012 pm_buffer_append_byte(buffer, '\0');
23013
23014 pm_node_destroy(&parser, node);
23015 pm_parser_free(&parser);
23016 pm_options_free(&options);
23017}
23018
23024pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
23025 pm_parser_t parser;
23026 pm_options_t options = { 0 };
23027 pm_options_read(&options, data);
23028
23029 pm_buffer_t parser_buffer;
23030 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
23031 pm_serialize_header(buffer);
23032 pm_serialize_content(&parser, node, buffer);
23033 pm_buffer_append_byte(buffer, '\0');
23034
23035 pm_node_destroy(&parser, node);
23036 pm_buffer_free(&parser_buffer);
23037 pm_parser_free(&parser);
23038 pm_options_free(&options);
23039}
23040
23045pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23046 pm_options_t options = { 0 };
23047 pm_options_read(&options, data);
23048
23049 pm_parser_t parser;
23050 pm_parser_init(&parser, source, size, &options);
23051
23052 pm_node_t *node = pm_parse(&parser);
23053 pm_serialize_header(buffer);
23054 pm_serialize_encoding(parser.encoding, buffer);
23055 pm_buffer_append_varsint(buffer, parser.start_line);
23056 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23057
23058 pm_node_destroy(&parser, node);
23059 pm_parser_free(&parser);
23060 pm_options_free(&options);
23061}
23062
23063#endif
23064
23065/******************************************************************************/
23066/* Slice queries for the Ruby API */
23067/******************************************************************************/
23068
23070typedef enum {
23072 PM_SLICE_TYPE_ERROR = -1,
23073
23075 PM_SLICE_TYPE_NONE,
23076
23078 PM_SLICE_TYPE_LOCAL,
23079
23081 PM_SLICE_TYPE_CONSTANT,
23082
23084 PM_SLICE_TYPE_METHOD_NAME
23085} pm_slice_type_t;
23086
23090pm_slice_type_t
23091pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23092 // first, get the right encoding object
23093 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23094 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23095
23096 // check that there is at least one character
23097 if (length == 0) return PM_SLICE_TYPE_NONE;
23098
23099 size_t width;
23100 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23101 // valid because alphabetical
23102 } else if (*source == '_') {
23103 // valid because underscore
23104 width = 1;
23105 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23106 // valid because multibyte
23107 } else {
23108 // invalid because no match
23109 return PM_SLICE_TYPE_NONE;
23110 }
23111
23112 // determine the type of the slice based on the first character
23113 const uint8_t *end = source + length;
23114 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23115
23116 // next, iterate through all of the bytes of the string to ensure that they
23117 // are all valid identifier characters
23118 source += width;
23119
23120 while (source < end) {
23121 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23122 // valid because alphanumeric
23123 source += width;
23124 } else if (*source == '_') {
23125 // valid because underscore
23126 source++;
23127 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23128 // valid because multibyte
23129 source += width;
23130 } else {
23131 // invalid because no match
23132 break;
23133 }
23134 }
23135
23136 // accept a ! or ? at the end of the slice as a method name
23137 if (*source == '!' || *source == '?' || *source == '=') {
23138 source++;
23139 result = PM_SLICE_TYPE_METHOD_NAME;
23140 }
23141
23142 // valid if we are at the end of the slice
23143 return source == end ? result : PM_SLICE_TYPE_NONE;
23144}
23145
23150pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23151 switch (pm_slice_type(source, length, encoding_name)) {
23152 case PM_SLICE_TYPE_ERROR:
23153 return PM_STRING_QUERY_ERROR;
23154 case PM_SLICE_TYPE_NONE:
23155 case PM_SLICE_TYPE_CONSTANT:
23156 case PM_SLICE_TYPE_METHOD_NAME:
23157 return PM_STRING_QUERY_FALSE;
23158 case PM_SLICE_TYPE_LOCAL:
23159 return PM_STRING_QUERY_TRUE;
23160 }
23161
23162 assert(false && "unreachable");
23163 return PM_STRING_QUERY_FALSE;
23164}
23165
23170pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23171 switch (pm_slice_type(source, length, encoding_name)) {
23172 case PM_SLICE_TYPE_ERROR:
23173 return PM_STRING_QUERY_ERROR;
23174 case PM_SLICE_TYPE_NONE:
23175 case PM_SLICE_TYPE_LOCAL:
23176 case PM_SLICE_TYPE_METHOD_NAME:
23177 return PM_STRING_QUERY_FALSE;
23178 case PM_SLICE_TYPE_CONSTANT:
23179 return PM_STRING_QUERY_TRUE;
23180 }
23181
23182 assert(false && "unreachable");
23183 return PM_STRING_QUERY_FALSE;
23184}
23185
23190pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23191#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23192#define C1(c) (*source == c)
23193#define C2(s) (memcmp(source, s, 2) == 0)
23194#define C3(s) (memcmp(source, s, 3) == 0)
23195
23196 switch (pm_slice_type(source, length, encoding_name)) {
23197 case PM_SLICE_TYPE_ERROR:
23198 return PM_STRING_QUERY_ERROR;
23199 case PM_SLICE_TYPE_NONE:
23200 break;
23201 case PM_SLICE_TYPE_LOCAL:
23202 // numbered parameters are not valid method names
23203 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23204 case PM_SLICE_TYPE_CONSTANT:
23205 // all constants are valid method names
23206 case PM_SLICE_TYPE_METHOD_NAME:
23207 // all method names are valid method names
23208 return PM_STRING_QUERY_TRUE;
23209 }
23210
23211 switch (length) {
23212 case 1:
23213 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23214 case 2:
23215 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23216 case 3:
23217 return B(C3("===") || C3("<=>") || C3("[]="));
23218 default:
23219 return PM_STRING_QUERY_FALSE;
23220 }
23221
23222#undef B
23223#undef C1
23224#undef C2
23225#undef C3
23226}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:219
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:225
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:98
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:249
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:257
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:251
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:254
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2141
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2118
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:95
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2048
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17994
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17996
const uint8_t * start
The start of the regular expression.
Definition prism.c:17999
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18010
const uint8_t * end
The end of the regular expression.
Definition prism.c:18002
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20907
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20918
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20909
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20915
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20912
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20925
AndNode.
Definition ast.h:1262
struct pm_node * left
AndNode::left.
Definition ast.h:1278
struct pm_node * right
AndNode::right.
Definition ast.h:1291
ArgumentsNode.
Definition ast.h:1323
pm_node_t base
The embedded base node.
Definition ast.h:1325
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1336
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1354
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1364
ArrayPatternNode.
Definition ast.h:1415
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1423
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1463
pm_node_t base
The embedded base node.
Definition ast.h:1417
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1473
AssocNode.
Definition ast.h:1488
struct pm_node * value
AssocNode::value.
Definition ast.h:1520
struct pm_node * key
AssocNode::key.
Definition ast.h:1507
BeginNode.
Definition ast.h:1614
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1667
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1647
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1637
pm_node_t base
The embedded base node.
Definition ast.h:1616
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1657
This struct represents a set of binding powers used for a given token.
Definition prism.c:12948
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12956
pm_binding_power_t left
The left binding power.
Definition prism.c:12950
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12962
pm_binding_power_t right
The right binding power.
Definition prism.c:12953
BlockLocalVariableNode.
Definition ast.h:1733
BlockNode.
Definition ast.h:1761
BlockParameterNode.
Definition ast.h:1837
BlockParametersNode.
Definition ast.h:1891
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2118
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2179
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2199
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2137
pm_constant_id_t name
CallNode::name.
Definition ast.h:2160
pm_node_t base
The embedded base node.
Definition ast.h:2120
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2150
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2170
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2189
struct pm_node * block
CallNode::block.
Definition ast.h:2209
CaseMatchNode.
Definition ast.h:2544
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2567
CaseNode.
Definition ast.h:2614
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2637
ClassVariableReadNode.
Definition ast.h:2909
ClassVariableTargetNode.
Definition ast.h:2938
ClassVariableWriteNode.
Definition ast.h:2961
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3175
ConstantPathTargetNode.
Definition ast.h:3313
ConstantReadNode.
Definition ast.h:3408
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3437
ConstantWriteNode.
Definition ast.h:3460
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:363
ElseNode.
Definition ast.h:3639
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3652
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3737
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3750
FindPatternNode.
Definition ast.h:3794
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3802
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3822
pm_node_t base
The embedded base node.
Definition ast.h:3796
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3827
FlipFlopNode.
Definition ast.h:3845
FloatNode.
Definition ast.h:3878
double value
FloatNode::value.
Definition ast.h:3888
pm_node_t base
The embedded base node.
Definition ast.h:3880
ForwardingParameterNode.
Definition ast.h:4014
GlobalVariableReadNode.
Definition ast.h:4174
GlobalVariableTargetNode.
Definition ast.h:4203
GlobalVariableWriteNode.
Definition ast.h:4226
HashNode.
Definition ast.h:4288
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4314
HashPatternNode.
Definition ast.h:4342
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4365
pm_node_t base
The embedded base node.
Definition ast.h:4344
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4370
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4350
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4391
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4451
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4470
ImaginaryNode.
Definition ast.h:4497
InstanceVariableReadNode.
Definition ast.h:4987
InstanceVariableTargetNode.
Definition ast.h:5016
InstanceVariableWriteNode.
Definition ast.h:5039
IntegerNode.
Definition ast.h:5107
pm_integer_t value
IntegerNode::value.
Definition ast.h:5117
pm_node_t base
The embedded base node.
Definition ast.h:5109
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5145
InterpolatedRegularExpressionNode.
Definition ast.h:5191
InterpolatedStringNode.
Definition ast.h:5228
pm_node_t base
The embedded base node.
Definition ast.h:5230
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5236
InterpolatedSymbolNode.
Definition ast.h:5261
pm_node_t base
The embedded base node.
Definition ast.h:5263
InterpolatedXStringNode.
Definition ast.h:5294
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5302
pm_node_t base
The embedded base node.
Definition ast.h:5296
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5307
KeywordHashNode.
Definition ast.h:5366
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@95 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
union pm_lex_mode::@96 as
The data associated with this type of lex mode.
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5608
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5639
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5626
LocalVariableTargetNode.
Definition ast.h:5654
LocalVariableWriteNode.
Definition ast.h:5682
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5709
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5696
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5774
MatchWriteNode.
Definition ast.h:5878
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5891
MissingNode.
Definition ast.h:5903
MultiTargetNode.
Definition ast.h:5974
pm_node_t base
The embedded base node.
Definition ast.h:5976
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6032
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:5992
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6042
MultiWriteNode.
Definition ast.h:6057
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1068
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1073
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1079
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
OptionalParameterNode.
Definition ast.h:6330
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:104
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:153
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:115
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:169
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:176
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:130
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:109
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:162
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:186
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:135
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:118
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:150
OrNode.
Definition ast.h:6368
struct pm_node * left
OrNode::left.
Definition ast.h:6384
struct pm_node * right
OrNode::right.
Definition ast.h:6397
ParametersNode.
Definition ast.h:6423
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6441
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6461
pm_node_t base
The embedded base node.
Definition ast.h:6425
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6456
ParenthesesNode.
Definition ast.h:6479
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6487
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
struct pm_parser::@101 lex_modes
A stack of lex modes.
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6685
struct pm_node * right
RangeNode::right.
Definition ast.h:6715
struct pm_node * left
RangeNode::left.
Definition ast.h:6701
RationalNode.
Definition ast.h:6743
pm_node_t base
The embedded base node.
Definition ast.h:6745
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6755
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10374
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10379
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10376
RegularExpressionNode.
Definition ast.h:6810
pm_node_t base
The embedded base node.
Definition ast.h:6812
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6833
RequiredParameterNode.
Definition ast.h:6884
RescueModifierNode.
Definition ast.h:6907
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:6925
RescueNode.
Definition ast.h:6945
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:6983
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:6973
pm_node_t base
The embedded base node.
Definition ast.h:6947
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7245
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7258
StatementsNode.
Definition ast.h:7273
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7281
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7308
pm_node_t base
The embedded base node.
Definition ast.h:7310
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7331
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7326
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7316
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@102 type
The type of the string.
SymbolNode.
Definition ast.h:7400
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7413
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7423
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10348
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10353
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10359
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7456
UnlessNode.
Definition ast.h:7487
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7537
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7547
WhenNode.
Definition ast.h:7623
XStringNode.
Definition ast.h:7714