Ruby 3.5.0dev (2025-10-09 revision fa409d5f3af507a1e4f31642924dd694f37b1c33)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1043 case PM_MATCH_REQUIRED_NODE:
1044 return void_node != NULL ? void_node : node;
1045 case PM_MATCH_PREDICATE_NODE:
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1151 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1185 case PM_BACK_REFERENCE_READ_NODE:
1186 case PM_CLASS_VARIABLE_READ_NODE:
1187 case PM_GLOBAL_VARIABLE_READ_NODE:
1188 case PM_INSTANCE_VARIABLE_READ_NODE:
1189 case PM_LOCAL_VARIABLE_READ_NODE:
1190 case PM_NUMBERED_REFERENCE_READ_NODE:
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1249 case PM_CONSTANT_PATH_NODE:
1250 type = "::";
1251 length = 2;
1252 break;
1253 case PM_CONSTANT_READ_NODE:
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1268 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1269 case PM_INTERPOLATED_STRING_NODE:
1270 case PM_RATIONAL_NODE:
1271 case PM_REGULAR_EXPRESSION_NODE:
1272 case PM_SOURCE_ENCODING_NODE:
1273 case PM_SOURCE_FILE_NODE:
1274 case PM_SOURCE_LINE_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1287 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1392 case PM_REGULAR_EXPRESSION_NODE:
1393 case PM_SOURCE_ENCODING_NODE:
1394 case PM_SOURCE_FILE_NODE:
1395 case PM_SOURCE_LINE_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1477 case PM_REGULAR_EXPRESSION_NODE:
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1482 node->type = PM_MATCH_LAST_LINE_NODE;
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1489 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1494 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1511 case PM_SOURCE_FILE_NODE:
1512 case PM_INTERPOLATED_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1516 case PM_INTERPOLATED_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1519 case PM_SOURCE_LINE_NODE:
1520 case PM_SOURCE_ENCODING_NODE:
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1526 case PM_CLASS_VARIABLE_WRITE_NODE:
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1529 case PM_CONSTANT_WRITE_NODE:
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1532 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1535 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1538 case PM_LOCAL_VARIABLE_WRITE_NODE:
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1541 case PM_MULTI_WRITE_NODE:
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2626
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2629static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2630
2636static pm_call_node_t *
2637pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2638 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2639
2640 *node = (pm_call_node_t) {
2641 {
2642 .type = PM_CALL_NODE,
2643 .flags = flags,
2644 .node_id = PM_NODE_IDENTIFY(parser),
2645 .location = PM_LOCATION_NULL_VALUE(parser),
2646 },
2647 .receiver = NULL,
2648 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2651 .arguments = NULL,
2652 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2653 .block = NULL,
2654 .name = 0
2655 };
2656
2657 return node;
2658}
2659
2664static inline pm_node_flags_t
2665pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2666 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2667}
2668
2673static pm_call_node_t *
2674pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2675 pm_assert_value_expression(parser, receiver);
2676
2677 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2678 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2679 flags |= PM_CALL_NODE_FLAGS_INDEX;
2680 }
2681
2682 pm_call_node_t *node = pm_call_node_create(parser, flags);
2683
2684 node->base.location.start = receiver->location.start;
2685 node->base.location.end = pm_arguments_end(arguments);
2686
2687 node->receiver = receiver;
2688 node->message_loc.start = arguments->opening_loc.start;
2689 node->message_loc.end = arguments->closing_loc.end;
2690
2691 node->opening_loc = arguments->opening_loc;
2692 node->arguments = arguments->arguments;
2693 node->closing_loc = arguments->closing_loc;
2694 node->block = arguments->block;
2695
2696 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2697 return node;
2698}
2699
2703static pm_call_node_t *
2704pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2705 pm_assert_value_expression(parser, receiver);
2706 pm_assert_value_expression(parser, argument);
2707
2708 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2709
2710 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2711 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2712
2713 node->receiver = receiver;
2714 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2715
2716 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2717 pm_arguments_node_arguments_append(arguments, argument);
2718 node->arguments = arguments;
2719
2720 node->name = pm_parser_constant_id_token(parser, operator);
2721 return node;
2722}
2723
2727static pm_call_node_t *
2728pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2729 pm_assert_value_expression(parser, receiver);
2730
2731 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2732
2733 node->base.location.start = receiver->location.start;
2734 const uint8_t *end = pm_arguments_end(arguments);
2735 if (end == NULL) {
2736 end = message->end;
2737 }
2738 node->base.location.end = end;
2739
2740 node->receiver = receiver;
2741 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2742 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2743 node->opening_loc = arguments->opening_loc;
2744 node->arguments = arguments->arguments;
2745 node->closing_loc = arguments->closing_loc;
2746 node->block = arguments->block;
2747
2748 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2749 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2750 }
2751
2752 node->name = pm_parser_constant_id_token(parser, message);
2753 return node;
2754}
2755
2759static pm_call_node_t *
2760pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2761 pm_call_node_t *node = pm_call_node_create(parser, 0);
2762 node->base.location.start = parser->start;
2763 node->base.location.end = parser->end;
2764
2765 node->receiver = receiver;
2766 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2768 node->arguments = arguments;
2769
2770 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2771 return node;
2772}
2773
2778static pm_call_node_t *
2779pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2780 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2781
2782 node->base.location.start = message->start;
2783 node->base.location.end = pm_arguments_end(arguments);
2784
2785 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2786 node->opening_loc = arguments->opening_loc;
2787 node->arguments = arguments->arguments;
2788 node->closing_loc = arguments->closing_loc;
2789 node->block = arguments->block;
2790
2791 node->name = pm_parser_constant_id_token(parser, message);
2792 return node;
2793}
2794
2799static pm_call_node_t *
2800pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2801 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2802
2803 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2804 node->arguments = arguments;
2805
2806 node->name = name;
2807 return node;
2808}
2809
2813static pm_call_node_t *
2814pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2815 pm_assert_value_expression(parser, receiver);
2816 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2817
2818 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2819
2820 node->base.location.start = message->start;
2821 if (arguments->closing_loc.start != NULL) {
2822 node->base.location.end = arguments->closing_loc.end;
2823 } else {
2824 assert(receiver != NULL);
2825 node->base.location.end = receiver->location.end;
2826 }
2827
2828 node->receiver = receiver;
2829 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2830 node->opening_loc = arguments->opening_loc;
2831 node->arguments = arguments->arguments;
2832 node->closing_loc = arguments->closing_loc;
2833
2834 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2835 return node;
2836}
2837
2841static pm_call_node_t *
2842pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2843 pm_assert_value_expression(parser, receiver);
2844
2845 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2846
2847 node->base.location.start = receiver->location.start;
2848 node->base.location.end = pm_arguments_end(arguments);
2849
2850 node->receiver = receiver;
2851 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2852 node->opening_loc = arguments->opening_loc;
2853 node->arguments = arguments->arguments;
2854 node->closing_loc = arguments->closing_loc;
2855 node->block = arguments->block;
2856
2857 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2858 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2859 }
2860
2861 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2862 return node;
2863}
2864
2868static pm_call_node_t *
2869pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2870 pm_assert_value_expression(parser, receiver);
2871
2872 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2873
2874 node->base.location.start = operator->start;
2875 node->base.location.end = receiver->location.end;
2876
2877 node->receiver = receiver;
2878 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2879
2880 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2881 return node;
2882}
2883
2888static pm_call_node_t *
2889pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2890 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2891
2892 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2893 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2894
2895 node->name = pm_parser_constant_id_token(parser, message);
2896 return node;
2897}
2898
2903static inline bool
2904pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2905 return (
2906 (node->message_loc.start != NULL) &&
2907 (node->message_loc.end[-1] != '!') &&
2908 (node->message_loc.end[-1] != '?') &&
2909 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2910 (node->opening_loc.start == NULL) &&
2911 (node->arguments == NULL) &&
2912 (node->block == NULL)
2913 );
2914}
2915
2919static void
2920pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2921 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2922
2923 if (write_constant->length > 0) {
2924 size_t length = write_constant->length - 1;
2925
2926 void *memory = xmalloc(length);
2927 memcpy(memory, write_constant->start, length);
2928
2929 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2930 } else {
2931 // We can get here if the message was missing because of a syntax error.
2932 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2933 }
2934}
2935
2939static pm_call_and_write_node_t *
2940pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2941 assert(target->block == NULL);
2942 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2943 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2944
2945 *node = (pm_call_and_write_node_t) {
2946 {
2947 .type = PM_CALL_AND_WRITE_NODE,
2948 .flags = target->base.flags,
2949 .node_id = PM_NODE_IDENTIFY(parser),
2950 .location = {
2951 .start = target->base.location.start,
2952 .end = value->location.end
2953 }
2954 },
2955 .receiver = target->receiver,
2956 .call_operator_loc = target->call_operator_loc,
2957 .message_loc = target->message_loc,
2958 .read_name = 0,
2959 .write_name = target->name,
2960 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2961 .value = value
2962 };
2963
2964 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2965
2966 // Here we're going to free the target, since it is no longer necessary.
2967 // However, we don't want to call `pm_node_destroy` because we want to keep
2968 // around all of its children since we just reused them.
2969 xfree(target);
2970
2971 return node;
2972}
2973
2978static void
2979pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2980 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2981 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2982 pm_node_t *node;
2983 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2984 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2985 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2986 break;
2987 }
2988 }
2989 }
2990
2991 if (block != NULL) {
2992 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2993 }
2994 }
2995}
2996
3000static pm_index_and_write_node_t *
3001pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3002 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3003 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3004
3005 pm_index_arguments_check(parser, target->arguments, target->block);
3006
3007 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3008 *node = (pm_index_and_write_node_t) {
3009 {
3010 .type = PM_INDEX_AND_WRITE_NODE,
3011 .flags = target->base.flags,
3012 .node_id = PM_NODE_IDENTIFY(parser),
3013 .location = {
3014 .start = target->base.location.start,
3015 .end = value->location.end
3016 }
3017 },
3018 .receiver = target->receiver,
3019 .call_operator_loc = target->call_operator_loc,
3020 .opening_loc = target->opening_loc,
3021 .arguments = target->arguments,
3022 .closing_loc = target->closing_loc,
3023 .block = (pm_block_argument_node_t *) target->block,
3024 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3025 .value = value
3026 };
3027
3028 // Here we're going to free the target, since it is no longer necessary.
3029 // However, we don't want to call `pm_node_destroy` because we want to keep
3030 // around all of its children since we just reused them.
3031 xfree(target);
3032
3033 return node;
3034}
3035
3039static pm_call_operator_write_node_t *
3040pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 assert(target->block == NULL);
3042 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3043
3044 *node = (pm_call_operator_write_node_t) {
3045 {
3046 .type = PM_CALL_OPERATOR_WRITE_NODE,
3047 .flags = target->base.flags,
3048 .node_id = PM_NODE_IDENTIFY(parser),
3049 .location = {
3050 .start = target->base.location.start,
3051 .end = value->location.end
3052 }
3053 },
3054 .receiver = target->receiver,
3055 .call_operator_loc = target->call_operator_loc,
3056 .message_loc = target->message_loc,
3057 .read_name = 0,
3058 .write_name = target->name,
3059 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3060 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3061 .value = value
3062 };
3063
3064 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3065
3066 // Here we're going to free the target, since it is no longer necessary.
3067 // However, we don't want to call `pm_node_destroy` because we want to keep
3068 // around all of its children since we just reused them.
3069 xfree(target);
3070
3071 return node;
3072}
3073
3077static pm_index_operator_write_node_t *
3078pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3079 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3080
3081 pm_index_arguments_check(parser, target->arguments, target->block);
3082
3083 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3084 *node = (pm_index_operator_write_node_t) {
3085 {
3086 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3087 .flags = target->base.flags,
3088 .node_id = PM_NODE_IDENTIFY(parser),
3089 .location = {
3090 .start = target->base.location.start,
3091 .end = value->location.end
3092 }
3093 },
3094 .receiver = target->receiver,
3095 .call_operator_loc = target->call_operator_loc,
3096 .opening_loc = target->opening_loc,
3097 .arguments = target->arguments,
3098 .closing_loc = target->closing_loc,
3099 .block = (pm_block_argument_node_t *) target->block,
3100 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3101 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3102 .value = value
3103 };
3104
3105 // Here we're going to free the target, since it is no longer necessary.
3106 // However, we don't want to call `pm_node_destroy` because we want to keep
3107 // around all of its children since we just reused them.
3108 xfree(target);
3109
3110 return node;
3111}
3112
3116static pm_call_or_write_node_t *
3117pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3118 assert(target->block == NULL);
3119 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3120 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3121
3122 *node = (pm_call_or_write_node_t) {
3123 {
3124 .type = PM_CALL_OR_WRITE_NODE,
3125 .flags = target->base.flags,
3126 .node_id = PM_NODE_IDENTIFY(parser),
3127 .location = {
3128 .start = target->base.location.start,
3129 .end = value->location.end
3130 }
3131 },
3132 .receiver = target->receiver,
3133 .call_operator_loc = target->call_operator_loc,
3134 .message_loc = target->message_loc,
3135 .read_name = 0,
3136 .write_name = target->name,
3137 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3138 .value = value
3139 };
3140
3141 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3142
3143 // Here we're going to free the target, since it is no longer necessary.
3144 // However, we don't want to call `pm_node_destroy` because we want to keep
3145 // around all of its children since we just reused them.
3146 xfree(target);
3147
3148 return node;
3149}
3150
3154static pm_index_or_write_node_t *
3155pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3156 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3157 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3158
3159 pm_index_arguments_check(parser, target->arguments, target->block);
3160
3161 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3162 *node = (pm_index_or_write_node_t) {
3163 {
3164 .type = PM_INDEX_OR_WRITE_NODE,
3165 .flags = target->base.flags,
3166 .node_id = PM_NODE_IDENTIFY(parser),
3167 .location = {
3168 .start = target->base.location.start,
3169 .end = value->location.end
3170 }
3171 },
3172 .receiver = target->receiver,
3173 .call_operator_loc = target->call_operator_loc,
3174 .opening_loc = target->opening_loc,
3175 .arguments = target->arguments,
3176 .closing_loc = target->closing_loc,
3177 .block = (pm_block_argument_node_t *) target->block,
3178 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3179 .value = value
3180 };
3181
3182 // Here we're going to free the target, since it is no longer necessary.
3183 // However, we don't want to call `pm_node_destroy` because we want to keep
3184 // around all of its children since we just reused them.
3185 xfree(target);
3186
3187 return node;
3188}
3189
3194static pm_call_target_node_t *
3195pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3196 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3197
3198 *node = (pm_call_target_node_t) {
3199 {
3200 .type = PM_CALL_TARGET_NODE,
3201 .flags = target->base.flags,
3202 .node_id = PM_NODE_IDENTIFY(parser),
3203 .location = target->base.location
3204 },
3205 .receiver = target->receiver,
3206 .call_operator_loc = target->call_operator_loc,
3207 .name = target->name,
3208 .message_loc = target->message_loc
3209 };
3210
3211 // Here we're going to free the target, since it is no longer necessary.
3212 // However, we don't want to call `pm_node_destroy` because we want to keep
3213 // around all of its children since we just reused them.
3214 xfree(target);
3215
3216 return node;
3217}
3218
3223static pm_index_target_node_t *
3224pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3225 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3226 pm_node_flags_t flags = target->base.flags;
3227
3228 pm_index_arguments_check(parser, target->arguments, target->block);
3229
3230 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3231 *node = (pm_index_target_node_t) {
3232 {
3233 .type = PM_INDEX_TARGET_NODE,
3234 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3235 .node_id = PM_NODE_IDENTIFY(parser),
3236 .location = target->base.location
3237 },
3238 .receiver = target->receiver,
3239 .opening_loc = target->opening_loc,
3240 .arguments = target->arguments,
3241 .closing_loc = target->closing_loc,
3242 .block = (pm_block_argument_node_t *) target->block,
3243 };
3244
3245 // Here we're going to free the target, since it is no longer necessary.
3246 // However, we don't want to call `pm_node_destroy` because we want to keep
3247 // around all of its children since we just reused them.
3248 xfree(target);
3249
3250 return node;
3251}
3252
3256static pm_capture_pattern_node_t *
3257pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3258 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3259
3260 *node = (pm_capture_pattern_node_t) {
3261 {
3262 .type = PM_CAPTURE_PATTERN_NODE,
3263 .node_id = PM_NODE_IDENTIFY(parser),
3264 .location = {
3265 .start = value->location.start,
3266 .end = target->base.location.end
3267 },
3268 },
3269 .value = value,
3270 .target = target,
3271 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3272 };
3273
3274 return node;
3275}
3276
3280static pm_case_node_t *
3281pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3282 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3283
3284 *node = (pm_case_node_t) {
3285 {
3286 .type = PM_CASE_NODE,
3287 .node_id = PM_NODE_IDENTIFY(parser),
3288 .location = {
3289 .start = case_keyword->start,
3290 .end = end_keyword->end
3291 },
3292 },
3293 .predicate = predicate,
3294 .else_clause = NULL,
3295 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3296 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3297 .conditions = { 0 }
3298 };
3299
3300 return node;
3301}
3302
3306static void
3307pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3308 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3309
3310 pm_node_list_append(&node->conditions, condition);
3311 node->base.location.end = condition->location.end;
3312}
3313
3317static void
3318pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3319 node->else_clause = else_clause;
3320 node->base.location.end = else_clause->base.location.end;
3321}
3322
3326static void
3327pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3328 node->base.location.end = end_keyword->end;
3329 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3330}
3331
3335static pm_case_match_node_t *
3336pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3337 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3338
3339 *node = (pm_case_match_node_t) {
3340 {
3341 .type = PM_CASE_MATCH_NODE,
3342 .node_id = PM_NODE_IDENTIFY(parser),
3343 .location = {
3344 .start = case_keyword->start,
3345 .end = end_keyword->end
3346 },
3347 },
3348 .predicate = predicate,
3349 .else_clause = NULL,
3350 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3351 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3352 .conditions = { 0 }
3353 };
3354
3355 return node;
3356}
3357
3361static void
3362pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3363 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3364
3365 pm_node_list_append(&node->conditions, condition);
3366 node->base.location.end = condition->location.end;
3367}
3368
3372static void
3373pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3374 node->else_clause = else_clause;
3375 node->base.location.end = else_clause->base.location.end;
3376}
3377
3381static void
3382pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3383 node->base.location.end = end_keyword->end;
3384 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3385}
3386
3390static pm_class_node_t *
3391pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3392 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3393
3394 *node = (pm_class_node_t) {
3395 {
3396 .type = PM_CLASS_NODE,
3397 .node_id = PM_NODE_IDENTIFY(parser),
3398 .location = { .start = class_keyword->start, .end = end_keyword->end },
3399 },
3400 .locals = *locals,
3401 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3402 .constant_path = constant_path,
3403 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3404 .superclass = superclass,
3405 .body = body,
3406 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3407 .name = pm_parser_constant_id_token(parser, name)
3408 };
3409
3410 return node;
3411}
3412
3416static pm_class_variable_and_write_node_t *
3417pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3418 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3419 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3420
3421 *node = (pm_class_variable_and_write_node_t) {
3422 {
3423 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3424 .node_id = PM_NODE_IDENTIFY(parser),
3425 .location = {
3426 .start = target->base.location.start,
3427 .end = value->location.end
3428 }
3429 },
3430 .name = target->name,
3431 .name_loc = target->base.location,
3432 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3433 .value = value
3434 };
3435
3436 return node;
3437}
3438
3442static pm_class_variable_operator_write_node_t *
3443pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3444 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3445
3446 *node = (pm_class_variable_operator_write_node_t) {
3447 {
3448 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3449 .node_id = PM_NODE_IDENTIFY(parser),
3450 .location = {
3451 .start = target->base.location.start,
3452 .end = value->location.end
3453 }
3454 },
3455 .name = target->name,
3456 .name_loc = target->base.location,
3457 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3458 .value = value,
3459 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3460 };
3461
3462 return node;
3463}
3464
3468static pm_class_variable_or_write_node_t *
3469pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3470 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3471 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3472
3473 *node = (pm_class_variable_or_write_node_t) {
3474 {
3475 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3476 .node_id = PM_NODE_IDENTIFY(parser),
3477 .location = {
3478 .start = target->base.location.start,
3479 .end = value->location.end
3480 }
3481 },
3482 .name = target->name,
3483 .name_loc = target->base.location,
3484 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3485 .value = value
3486 };
3487
3488 return node;
3489}
3490
3494static pm_class_variable_read_node_t *
3495pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3496 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3497 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3498
3499 *node = (pm_class_variable_read_node_t) {
3500 {
3501 .type = PM_CLASS_VARIABLE_READ_NODE,
3502 .node_id = PM_NODE_IDENTIFY(parser),
3503 .location = PM_LOCATION_TOKEN_VALUE(token)
3504 },
3505 .name = pm_parser_constant_id_token(parser, token)
3506 };
3507
3508 return node;
3509}
3510
3517static inline pm_node_flags_t
3518pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3519 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3520 return flags;
3521 }
3522 return 0;
3523}
3524
3528static pm_class_variable_write_node_t *
3529pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3530 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3531
3532 *node = (pm_class_variable_write_node_t) {
3533 {
3534 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3535 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3536 .node_id = PM_NODE_IDENTIFY(parser),
3537 .location = {
3538 .start = read_node->base.location.start,
3539 .end = value->location.end
3540 },
3541 },
3542 .name = read_node->name,
3543 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3544 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3545 .value = value
3546 };
3547
3548 return node;
3549}
3550
3554static pm_constant_path_and_write_node_t *
3555pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3556 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3557 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3558
3559 *node = (pm_constant_path_and_write_node_t) {
3560 {
3561 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3562 .node_id = PM_NODE_IDENTIFY(parser),
3563 .location = {
3564 .start = target->base.location.start,
3565 .end = value->location.end
3566 }
3567 },
3568 .target = target,
3569 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3570 .value = value
3571 };
3572
3573 return node;
3574}
3575
3579static pm_constant_path_operator_write_node_t *
3580pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3581 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3582
3583 *node = (pm_constant_path_operator_write_node_t) {
3584 {
3585 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3586 .node_id = PM_NODE_IDENTIFY(parser),
3587 .location = {
3588 .start = target->base.location.start,
3589 .end = value->location.end
3590 }
3591 },
3592 .target = target,
3593 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3594 .value = value,
3595 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3596 };
3597
3598 return node;
3599}
3600
3604static pm_constant_path_or_write_node_t *
3605pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3606 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3607 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3608
3609 *node = (pm_constant_path_or_write_node_t) {
3610 {
3611 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3612 .node_id = PM_NODE_IDENTIFY(parser),
3613 .location = {
3614 .start = target->base.location.start,
3615 .end = value->location.end
3616 }
3617 },
3618 .target = target,
3619 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3620 .value = value
3621 };
3622
3623 return node;
3624}
3625
3629static pm_constant_path_node_t *
3630pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3631 pm_assert_value_expression(parser, parent);
3632 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3633
3634 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3635 if (name_token->type == PM_TOKEN_CONSTANT) {
3636 name = pm_parser_constant_id_token(parser, name_token);
3637 }
3638
3639 *node = (pm_constant_path_node_t) {
3640 {
3641 .type = PM_CONSTANT_PATH_NODE,
3642 .node_id = PM_NODE_IDENTIFY(parser),
3643 .location = {
3644 .start = parent == NULL ? delimiter->start : parent->location.start,
3645 .end = name_token->end
3646 },
3647 },
3648 .parent = parent,
3649 .name = name,
3650 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3651 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3652 };
3653
3654 return node;
3655}
3656
3660static pm_constant_path_write_node_t *
3661pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3662 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3663
3664 *node = (pm_constant_path_write_node_t) {
3665 {
3666 .type = PM_CONSTANT_PATH_WRITE_NODE,
3667 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3668 .node_id = PM_NODE_IDENTIFY(parser),
3669 .location = {
3670 .start = target->base.location.start,
3671 .end = value->location.end
3672 },
3673 },
3674 .target = target,
3675 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3676 .value = value
3677 };
3678
3679 return node;
3680}
3681
3685static pm_constant_and_write_node_t *
3686pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3687 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3688 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3689
3690 *node = (pm_constant_and_write_node_t) {
3691 {
3692 .type = PM_CONSTANT_AND_WRITE_NODE,
3693 .node_id = PM_NODE_IDENTIFY(parser),
3694 .location = {
3695 .start = target->base.location.start,
3696 .end = value->location.end
3697 }
3698 },
3699 .name = target->name,
3700 .name_loc = target->base.location,
3701 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3702 .value = value
3703 };
3704
3705 return node;
3706}
3707
3711static pm_constant_operator_write_node_t *
3712pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3713 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3714
3715 *node = (pm_constant_operator_write_node_t) {
3716 {
3717 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3718 .node_id = PM_NODE_IDENTIFY(parser),
3719 .location = {
3720 .start = target->base.location.start,
3721 .end = value->location.end
3722 }
3723 },
3724 .name = target->name,
3725 .name_loc = target->base.location,
3726 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3727 .value = value,
3728 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3729 };
3730
3731 return node;
3732}
3733
3737static pm_constant_or_write_node_t *
3738pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3739 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3740 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3741
3742 *node = (pm_constant_or_write_node_t) {
3743 {
3744 .type = PM_CONSTANT_OR_WRITE_NODE,
3745 .node_id = PM_NODE_IDENTIFY(parser),
3746 .location = {
3747 .start = target->base.location.start,
3748 .end = value->location.end
3749 }
3750 },
3751 .name = target->name,
3752 .name_loc = target->base.location,
3753 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3754 .value = value
3755 };
3756
3757 return node;
3758}
3759
3763static pm_constant_read_node_t *
3764pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3765 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3766 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3767
3768 *node = (pm_constant_read_node_t) {
3769 {
3770 .type = PM_CONSTANT_READ_NODE,
3771 .node_id = PM_NODE_IDENTIFY(parser),
3772 .location = PM_LOCATION_TOKEN_VALUE(name)
3773 },
3774 .name = pm_parser_constant_id_token(parser, name)
3775 };
3776
3777 return node;
3778}
3779
3783static pm_constant_write_node_t *
3784pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3785 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3786
3787 *node = (pm_constant_write_node_t) {
3788 {
3789 .type = PM_CONSTANT_WRITE_NODE,
3790 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3791 .node_id = PM_NODE_IDENTIFY(parser),
3792 .location = {
3793 .start = target->base.location.start,
3794 .end = value->location.end
3795 }
3796 },
3797 .name = target->name,
3798 .name_loc = target->base.location,
3799 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3800 .value = value
3801 };
3802
3803 return node;
3804}
3805
3809static void
3810pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3811 switch (PM_NODE_TYPE(node)) {
3812 case PM_BEGIN_NODE: {
3813 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3814 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3815 break;
3816 }
3817 case PM_PARENTHESES_NODE: {
3818 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3819 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3820 break;
3821 }
3822 case PM_STATEMENTS_NODE: {
3823 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3824 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3825 break;
3826 }
3827 case PM_ARRAY_NODE:
3828 case PM_FLOAT_NODE:
3829 case PM_IMAGINARY_NODE:
3830 case PM_INTEGER_NODE:
3831 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3832 case PM_INTERPOLATED_STRING_NODE:
3833 case PM_INTERPOLATED_SYMBOL_NODE:
3834 case PM_INTERPOLATED_X_STRING_NODE:
3835 case PM_RATIONAL_NODE:
3836 case PM_REGULAR_EXPRESSION_NODE:
3837 case PM_SOURCE_ENCODING_NODE:
3838 case PM_SOURCE_FILE_NODE:
3839 case PM_SOURCE_LINE_NODE:
3840 case PM_STRING_NODE:
3841 case PM_SYMBOL_NODE:
3842 case PM_X_STRING_NODE:
3843 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3844 break;
3845 default:
3846 break;
3847 }
3848}
3849
3853static pm_def_node_t *
3854pm_def_node_create(
3855 pm_parser_t *parser,
3856 pm_constant_id_t name,
3857 const pm_token_t *name_loc,
3858 pm_node_t *receiver,
3859 pm_parameters_node_t *parameters,
3860 pm_node_t *body,
3861 pm_constant_id_list_t *locals,
3862 const pm_token_t *def_keyword,
3863 const pm_token_t *operator,
3864 const pm_token_t *lparen,
3865 const pm_token_t *rparen,
3866 const pm_token_t *equal,
3867 const pm_token_t *end_keyword
3868) {
3869 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3870 const uint8_t *end;
3871
3872 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3873 end = body->location.end;
3874 } else {
3875 end = end_keyword->end;
3876 }
3877
3878 if (receiver != NULL) {
3879 pm_def_node_receiver_check(parser, receiver);
3880 }
3881
3882 *node = (pm_def_node_t) {
3883 {
3884 .type = PM_DEF_NODE,
3885 .node_id = PM_NODE_IDENTIFY(parser),
3886 .location = { .start = def_keyword->start, .end = end },
3887 },
3888 .name = name,
3889 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3890 .receiver = receiver,
3891 .parameters = parameters,
3892 .body = body,
3893 .locals = *locals,
3894 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3895 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3896 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3897 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3898 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3899 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3900 };
3901
3902 return node;
3903}
3904
3908static pm_defined_node_t *
3909pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3910 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3911
3912 *node = (pm_defined_node_t) {
3913 {
3914 .type = PM_DEFINED_NODE,
3915 .node_id = PM_NODE_IDENTIFY(parser),
3916 .location = {
3917 .start = keyword_loc->start,
3918 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3919 },
3920 },
3921 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3922 .value = value,
3923 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3924 .keyword_loc = *keyword_loc
3925 };
3926
3927 return node;
3928}
3929
3933static pm_else_node_t *
3934pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3935 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3936 const uint8_t *end = NULL;
3937 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3938 end = statements->base.location.end;
3939 } else {
3940 end = end_keyword->end;
3941 }
3942
3943 *node = (pm_else_node_t) {
3944 {
3945 .type = PM_ELSE_NODE,
3946 .node_id = PM_NODE_IDENTIFY(parser),
3947 .location = {
3948 .start = else_keyword->start,
3949 .end = end,
3950 },
3951 },
3952 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3953 .statements = statements,
3954 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3955 };
3956
3957 return node;
3958}
3959
3963static pm_embedded_statements_node_t *
3964pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3965 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3966
3967 *node = (pm_embedded_statements_node_t) {
3968 {
3969 .type = PM_EMBEDDED_STATEMENTS_NODE,
3970 .node_id = PM_NODE_IDENTIFY(parser),
3971 .location = {
3972 .start = opening->start,
3973 .end = closing->end
3974 }
3975 },
3976 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3977 .statements = statements,
3978 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3979 };
3980
3981 return node;
3982}
3983
3987static pm_embedded_variable_node_t *
3988pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3989 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3990
3991 *node = (pm_embedded_variable_node_t) {
3992 {
3993 .type = PM_EMBEDDED_VARIABLE_NODE,
3994 .node_id = PM_NODE_IDENTIFY(parser),
3995 .location = {
3996 .start = operator->start,
3997 .end = variable->location.end
3998 }
3999 },
4000 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4001 .variable = variable
4002 };
4003
4004 return node;
4005}
4006
4010static pm_ensure_node_t *
4011pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4012 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4013
4014 *node = (pm_ensure_node_t) {
4015 {
4016 .type = PM_ENSURE_NODE,
4017 .node_id = PM_NODE_IDENTIFY(parser),
4018 .location = {
4019 .start = ensure_keyword->start,
4020 .end = end_keyword->end
4021 },
4022 },
4023 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4024 .statements = statements,
4025 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4026 };
4027
4028 return node;
4029}
4030
4034static pm_false_node_t *
4035pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4036 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4037 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4038
4039 *node = (pm_false_node_t) {{
4040 .type = PM_FALSE_NODE,
4041 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4042 .node_id = PM_NODE_IDENTIFY(parser),
4043 .location = PM_LOCATION_TOKEN_VALUE(token)
4044 }};
4045
4046 return node;
4047}
4048
4053static pm_find_pattern_node_t *
4054pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4055 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4056
4057 pm_node_t *left = nodes->nodes[0];
4058 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4059 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4060
4061 pm_node_t *right;
4062
4063 if (nodes->size == 1) {
4064 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4065 } else {
4066 right = nodes->nodes[nodes->size - 1];
4067 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4068 }
4069
4070#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4071 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4072 // The resulting AST will anyway be ignored, but this file still needs to compile.
4073 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4074#else
4075 pm_node_t *right_splat_node = right;
4076#endif
4077 *node = (pm_find_pattern_node_t) {
4078 {
4079 .type = PM_FIND_PATTERN_NODE,
4080 .node_id = PM_NODE_IDENTIFY(parser),
4081 .location = {
4082 .start = left->location.start,
4083 .end = right->location.end,
4084 },
4085 },
4086 .constant = NULL,
4087 .left = left_splat_node,
4088 .right = right_splat_node,
4089 .requireds = { 0 },
4090 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4091 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4092 };
4093
4094 // For now we're going to just copy over each pointer manually. This could be
4095 // much more efficient, as we could instead resize the node list to only point
4096 // to 1...-1.
4097 for (size_t index = 1; index < nodes->size - 1; index++) {
4098 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4099 }
4100
4101 return node;
4102}
4103
4108static double
4109pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4110 ptrdiff_t diff = token->end - token->start;
4111 if (diff <= 0) return 0.0;
4112
4113 // First, get a buffer of the content.
4114 size_t length = (size_t) diff;
4115 char *buffer = xmalloc(sizeof(char) * (length + 1));
4116 memcpy((void *) buffer, token->start, length);
4117
4118 // Next, determine if we need to replace the decimal point because of
4119 // locale-specific options, and then normalize them if we have to.
4120 char decimal_point = *localeconv()->decimal_point;
4121 if (decimal_point != '.') {
4122 for (size_t index = 0; index < length; index++) {
4123 if (buffer[index] == '.') buffer[index] = decimal_point;
4124 }
4125 }
4126
4127 // Next, handle underscores by removing them from the buffer.
4128 for (size_t index = 0; index < length; index++) {
4129 if (buffer[index] == '_') {
4130 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4131 length--;
4132 }
4133 }
4134
4135 // Null-terminate the buffer so that strtod cannot read off the end.
4136 buffer[length] = '\0';
4137
4138 // Now, call strtod to parse the value. Note that CRuby has their own
4139 // version of strtod which avoids locales. We're okay using the locale-aware
4140 // version because we've already validated through the parser that the token
4141 // is in a valid format.
4142 errno = 0;
4143 char *eptr;
4144 double value = strtod(buffer, &eptr);
4145
4146 // This should never happen, because we've already checked that the token
4147 // is in a valid format. However it's good to be safe.
4148 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4149 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4150 xfree((void *) buffer);
4151 return 0.0;
4152 }
4153
4154 // If errno is set, then it should only be ERANGE. At this point we need to
4155 // check if it's infinity (it should be).
4156 if (errno == ERANGE && PRISM_ISINF(value)) {
4157 int warn_width;
4158 const char *ellipsis;
4159
4160 if (length > 20) {
4161 warn_width = 20;
4162 ellipsis = "...";
4163 } else {
4164 warn_width = (int) length;
4165 ellipsis = "";
4166 }
4167
4168 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4169 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4170 }
4171
4172 // Finally we can free the buffer and return the value.
4173 xfree((void *) buffer);
4174 return value;
4175}
4176
4180static pm_float_node_t *
4181pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4182 assert(token->type == PM_TOKEN_FLOAT);
4183 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4184
4185 *node = (pm_float_node_t) {
4186 {
4187 .type = PM_FLOAT_NODE,
4188 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4189 .node_id = PM_NODE_IDENTIFY(parser),
4190 .location = PM_LOCATION_TOKEN_VALUE(token)
4191 },
4192 .value = pm_double_parse(parser, token)
4193 };
4194
4195 return node;
4196}
4197
4201static pm_imaginary_node_t *
4202pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4203 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4204
4205 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4206 *node = (pm_imaginary_node_t) {
4207 {
4208 .type = PM_IMAGINARY_NODE,
4209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4210 .node_id = PM_NODE_IDENTIFY(parser),
4211 .location = PM_LOCATION_TOKEN_VALUE(token)
4212 },
4213 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4214 .type = PM_TOKEN_FLOAT,
4215 .start = token->start,
4216 .end = token->end - 1
4217 }))
4218 };
4219
4220 return node;
4221}
4222
4226static pm_rational_node_t *
4227pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4228 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4229
4230 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4231 *node = (pm_rational_node_t) {
4232 {
4233 .type = PM_RATIONAL_NODE,
4234 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4235 .node_id = PM_NODE_IDENTIFY(parser),
4236 .location = PM_LOCATION_TOKEN_VALUE(token)
4237 },
4238 .numerator = { 0 },
4239 .denominator = { 0 }
4240 };
4241
4242 const uint8_t *start = token->start;
4243 const uint8_t *end = token->end - 1; // r
4244
4245 while (start < end && *start == '0') start++; // 0.1 -> .1
4246 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4247
4248 size_t length = (size_t) (end - start);
4249 if (length == 1) {
4250 node->denominator.value = 1;
4251 return node;
4252 }
4253
4254 const uint8_t *point = memchr(start, '.', length);
4255 assert(point && "should have a decimal point");
4256
4257 uint8_t *digits = xmalloc(length);
4258 if (digits == NULL) {
4259 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4260 abort();
4261 }
4262
4263 memcpy(digits, start, (unsigned long) (point - start));
4264 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4265 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4266
4267 digits[0] = '1';
4268 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4269 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4270 xfree(digits);
4271
4272 pm_integers_reduce(&node->numerator, &node->denominator);
4273 return node;
4274}
4275
4280static pm_imaginary_node_t *
4281pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4282 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4283
4284 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4285 *node = (pm_imaginary_node_t) {
4286 {
4287 .type = PM_IMAGINARY_NODE,
4288 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4289 .node_id = PM_NODE_IDENTIFY(parser),
4290 .location = PM_LOCATION_TOKEN_VALUE(token)
4291 },
4292 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4293 .type = PM_TOKEN_FLOAT_RATIONAL,
4294 .start = token->start,
4295 .end = token->end - 1
4296 }))
4297 };
4298
4299 return node;
4300}
4301
4305static pm_for_node_t *
4306pm_for_node_create(
4307 pm_parser_t *parser,
4308 pm_node_t *index,
4309 pm_node_t *collection,
4310 pm_statements_node_t *statements,
4311 const pm_token_t *for_keyword,
4312 const pm_token_t *in_keyword,
4313 const pm_token_t *do_keyword,
4314 const pm_token_t *end_keyword
4315) {
4316 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4317
4318 *node = (pm_for_node_t) {
4319 {
4320 .type = PM_FOR_NODE,
4321 .node_id = PM_NODE_IDENTIFY(parser),
4322 .location = {
4323 .start = for_keyword->start,
4324 .end = end_keyword->end
4325 },
4326 },
4327 .index = index,
4328 .collection = collection,
4329 .statements = statements,
4330 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4331 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4332 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4333 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4334 };
4335
4336 return node;
4337}
4338
4342static pm_forwarding_arguments_node_t *
4343pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4344 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4345 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4346
4347 *node = (pm_forwarding_arguments_node_t) {{
4348 .type = PM_FORWARDING_ARGUMENTS_NODE,
4349 .node_id = PM_NODE_IDENTIFY(parser),
4350 .location = PM_LOCATION_TOKEN_VALUE(token)
4351 }};
4352
4353 return node;
4354}
4355
4359static pm_forwarding_parameter_node_t *
4360pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4361 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4362 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4363
4364 *node = (pm_forwarding_parameter_node_t) {{
4365 .type = PM_FORWARDING_PARAMETER_NODE,
4366 .node_id = PM_NODE_IDENTIFY(parser),
4367 .location = PM_LOCATION_TOKEN_VALUE(token)
4368 }};
4369
4370 return node;
4371}
4372
4376static pm_forwarding_super_node_t *
4377pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4378 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4379 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4380 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4381
4382 pm_block_node_t *block = NULL;
4383 if (arguments->block != NULL) {
4384 block = (pm_block_node_t *) arguments->block;
4385 }
4386
4387 *node = (pm_forwarding_super_node_t) {
4388 {
4389 .type = PM_FORWARDING_SUPER_NODE,
4390 .node_id = PM_NODE_IDENTIFY(parser),
4391 .location = {
4392 .start = token->start,
4393 .end = block != NULL ? block->base.location.end : token->end
4394 },
4395 },
4396 .block = block
4397 };
4398
4399 return node;
4400}
4401
4406static pm_hash_pattern_node_t *
4407pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4408 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4409
4410 *node = (pm_hash_pattern_node_t) {
4411 {
4412 .type = PM_HASH_PATTERN_NODE,
4413 .node_id = PM_NODE_IDENTIFY(parser),
4414 .location = {
4415 .start = opening->start,
4416 .end = closing->end
4417 },
4418 },
4419 .constant = NULL,
4420 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4421 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4422 .elements = { 0 },
4423 .rest = NULL
4424 };
4425
4426 return node;
4427}
4428
4432static pm_hash_pattern_node_t *
4433pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4434 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4435
4436 const uint8_t *start;
4437 const uint8_t *end;
4438
4439 if (elements->size > 0) {
4440 if (rest) {
4441 start = elements->nodes[0]->location.start;
4442 end = rest->location.end;
4443 } else {
4444 start = elements->nodes[0]->location.start;
4445 end = elements->nodes[elements->size - 1]->location.end;
4446 }
4447 } else {
4448 assert(rest != NULL);
4449 start = rest->location.start;
4450 end = rest->location.end;
4451 }
4452
4453 *node = (pm_hash_pattern_node_t) {
4454 {
4455 .type = PM_HASH_PATTERN_NODE,
4456 .node_id = PM_NODE_IDENTIFY(parser),
4457 .location = {
4458 .start = start,
4459 .end = end
4460 },
4461 },
4462 .constant = NULL,
4463 .elements = { 0 },
4464 .rest = rest,
4465 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4466 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4467 };
4468
4469 pm_node_t *element;
4470 PM_NODE_LIST_FOREACH(elements, index, element) {
4471 pm_node_list_append(&node->elements, element);
4472 }
4473
4474 return node;
4475}
4476
4480static pm_constant_id_t
4481pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4482 switch (PM_NODE_TYPE(target)) {
4483 case PM_GLOBAL_VARIABLE_READ_NODE:
4484 return ((pm_global_variable_read_node_t *) target)->name;
4485 case PM_BACK_REFERENCE_READ_NODE:
4486 return ((pm_back_reference_read_node_t *) target)->name;
4487 case PM_NUMBERED_REFERENCE_READ_NODE:
4488 // This will only ever happen in the event of a syntax error, but we
4489 // still need to provide something for the node.
4490 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4491 default:
4492 assert(false && "unreachable");
4493 return (pm_constant_id_t) -1;
4494 }
4495}
4496
4500static pm_global_variable_and_write_node_t *
4501pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4502 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4503 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4504
4505 *node = (pm_global_variable_and_write_node_t) {
4506 {
4507 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4508 .node_id = PM_NODE_IDENTIFY(parser),
4509 .location = {
4510 .start = target->location.start,
4511 .end = value->location.end
4512 }
4513 },
4514 .name = pm_global_variable_write_name(parser, target),
4515 .name_loc = target->location,
4516 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4517 .value = value
4518 };
4519
4520 return node;
4521}
4522
4526static pm_global_variable_operator_write_node_t *
4527pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4528 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4529
4530 *node = (pm_global_variable_operator_write_node_t) {
4531 {
4532 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4533 .node_id = PM_NODE_IDENTIFY(parser),
4534 .location = {
4535 .start = target->location.start,
4536 .end = value->location.end
4537 }
4538 },
4539 .name = pm_global_variable_write_name(parser, target),
4540 .name_loc = target->location,
4541 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4542 .value = value,
4543 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4544 };
4545
4546 return node;
4547}
4548
4552static pm_global_variable_or_write_node_t *
4553pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4554 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4555 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4556
4557 *node = (pm_global_variable_or_write_node_t) {
4558 {
4559 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4560 .node_id = PM_NODE_IDENTIFY(parser),
4561 .location = {
4562 .start = target->location.start,
4563 .end = value->location.end
4564 }
4565 },
4566 .name = pm_global_variable_write_name(parser, target),
4567 .name_loc = target->location,
4568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4569 .value = value
4570 };
4571
4572 return node;
4573}
4574
4578static pm_global_variable_read_node_t *
4579pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4580 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4581
4582 *node = (pm_global_variable_read_node_t) {
4583 {
4584 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4585 .node_id = PM_NODE_IDENTIFY(parser),
4586 .location = PM_LOCATION_TOKEN_VALUE(name),
4587 },
4588 .name = pm_parser_constant_id_token(parser, name)
4589 };
4590
4591 return node;
4592}
4593
4597static pm_global_variable_read_node_t *
4598pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4599 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4600
4601 *node = (pm_global_variable_read_node_t) {
4602 {
4603 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4604 .node_id = PM_NODE_IDENTIFY(parser),
4605 .location = PM_LOCATION_NULL_VALUE(parser)
4606 },
4607 .name = name
4608 };
4609
4610 return node;
4611}
4612
4616static pm_global_variable_write_node_t *
4617pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4618 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4619
4620 *node = (pm_global_variable_write_node_t) {
4621 {
4622 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4623 .node_id = PM_NODE_IDENTIFY(parser),
4624 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4625 .location = {
4626 .start = target->location.start,
4627 .end = value->location.end
4628 },
4629 },
4630 .name = pm_global_variable_write_name(parser, target),
4631 .name_loc = PM_LOCATION_NODE_VALUE(target),
4632 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4633 .value = value
4634 };
4635
4636 return node;
4637}
4638
4642static pm_global_variable_write_node_t *
4643pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4644 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4645
4646 *node = (pm_global_variable_write_node_t) {
4647 {
4648 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4649 .node_id = PM_NODE_IDENTIFY(parser),
4650 .location = PM_LOCATION_NULL_VALUE(parser)
4651 },
4652 .name = name,
4653 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4655 .value = value
4656 };
4657
4658 return node;
4659}
4660
4664static pm_hash_node_t *
4665pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4666 assert(opening != NULL);
4667 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4668
4669 *node = (pm_hash_node_t) {
4670 {
4671 .type = PM_HASH_NODE,
4672 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4673 .node_id = PM_NODE_IDENTIFY(parser),
4674 .location = PM_LOCATION_TOKEN_VALUE(opening)
4675 },
4676 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4677 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4678 .elements = { 0 }
4679 };
4680
4681 return node;
4682}
4683
4687static inline void
4688pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4689 pm_node_list_append(&hash->elements, element);
4690
4691 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4692 if (static_literal) {
4693 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4694 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4696 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4697 }
4698
4699 if (!static_literal) {
4700 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4701 }
4702}
4703
4704static inline void
4705pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4706 hash->base.location.end = token->end;
4707 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4708}
4709
4713static pm_if_node_t *
4714pm_if_node_create(pm_parser_t *parser,
4715 const pm_token_t *if_keyword,
4716 pm_node_t *predicate,
4717 const pm_token_t *then_keyword,
4718 pm_statements_node_t *statements,
4719 pm_node_t *subsequent,
4720 const pm_token_t *end_keyword
4721) {
4722 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4723 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4724
4725 const uint8_t *end;
4726 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4727 end = end_keyword->end;
4728 } else if (subsequent != NULL) {
4729 end = subsequent->location.end;
4730 } else if (pm_statements_node_body_length(statements) != 0) {
4731 end = statements->base.location.end;
4732 } else {
4733 end = predicate->location.end;
4734 }
4735
4736 *node = (pm_if_node_t) {
4737 {
4738 .type = PM_IF_NODE,
4739 .flags = PM_NODE_FLAG_NEWLINE,
4740 .node_id = PM_NODE_IDENTIFY(parser),
4741 .location = {
4742 .start = if_keyword->start,
4743 .end = end
4744 },
4745 },
4746 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4747 .predicate = predicate,
4748 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4749 .statements = statements,
4750 .subsequent = subsequent,
4751 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4752 };
4753
4754 return node;
4755}
4756
4760static pm_if_node_t *
4761pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4762 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4763 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4764
4765 pm_statements_node_t *statements = pm_statements_node_create(parser);
4766 pm_statements_node_body_append(parser, statements, statement, true);
4767
4768 *node = (pm_if_node_t) {
4769 {
4770 .type = PM_IF_NODE,
4771 .flags = PM_NODE_FLAG_NEWLINE,
4772 .node_id = PM_NODE_IDENTIFY(parser),
4773 .location = {
4774 .start = statement->location.start,
4775 .end = predicate->location.end
4776 },
4777 },
4778 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4779 .predicate = predicate,
4780 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4781 .statements = statements,
4782 .subsequent = NULL,
4783 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4784 };
4785
4786 return node;
4787}
4788
4792static pm_if_node_t *
4793pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4794 pm_assert_value_expression(parser, predicate);
4795 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4796
4797 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4798 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4799
4800 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4801 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4802
4803 pm_token_t end_keyword = not_provided(parser);
4804 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4805
4806 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4807
4808 *node = (pm_if_node_t) {
4809 {
4810 .type = PM_IF_NODE,
4811 .flags = PM_NODE_FLAG_NEWLINE,
4812 .node_id = PM_NODE_IDENTIFY(parser),
4813 .location = {
4814 .start = predicate->location.start,
4815 .end = false_expression->location.end,
4816 },
4817 },
4818 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4819 .predicate = predicate,
4820 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4821 .statements = if_statements,
4822 .subsequent = (pm_node_t *) else_node,
4823 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4824 };
4825
4826 return node;
4827
4828}
4829
4830static inline void
4831pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4832 node->base.location.end = keyword->end;
4833 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4834}
4835
4836static inline void
4837pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4838 node->base.location.end = keyword->end;
4839 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4840}
4841
4845static pm_implicit_node_t *
4846pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4847 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4848
4849 *node = (pm_implicit_node_t) {
4850 {
4851 .type = PM_IMPLICIT_NODE,
4852 .node_id = PM_NODE_IDENTIFY(parser),
4853 .location = value->location
4854 },
4855 .value = value
4856 };
4857
4858 return node;
4859}
4860
4864static pm_implicit_rest_node_t *
4865pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4866 assert(token->type == PM_TOKEN_COMMA);
4867
4868 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4869
4870 *node = (pm_implicit_rest_node_t) {
4871 {
4872 .type = PM_IMPLICIT_REST_NODE,
4873 .node_id = PM_NODE_IDENTIFY(parser),
4874 .location = PM_LOCATION_TOKEN_VALUE(token)
4875 }
4876 };
4877
4878 return node;
4879}
4880
4884static pm_integer_node_t *
4885pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4886 assert(token->type == PM_TOKEN_INTEGER);
4887 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4888
4889 *node = (pm_integer_node_t) {
4890 {
4891 .type = PM_INTEGER_NODE,
4892 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4893 .node_id = PM_NODE_IDENTIFY(parser),
4894 .location = PM_LOCATION_TOKEN_VALUE(token)
4895 },
4896 .value = { 0 }
4897 };
4898
4899 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4900 switch (base) {
4901 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4902 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4903 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4904 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4905 default: assert(false && "unreachable"); break;
4906 }
4907
4908 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4909 return node;
4910}
4911
4916static pm_imaginary_node_t *
4917pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4918 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4919
4920 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4921 *node = (pm_imaginary_node_t) {
4922 {
4923 .type = PM_IMAGINARY_NODE,
4924 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4925 .node_id = PM_NODE_IDENTIFY(parser),
4926 .location = PM_LOCATION_TOKEN_VALUE(token)
4927 },
4928 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4929 .type = PM_TOKEN_INTEGER,
4930 .start = token->start,
4931 .end = token->end - 1
4932 }))
4933 };
4934
4935 return node;
4936}
4937
4942static pm_rational_node_t *
4943pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4944 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4945
4946 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4947 *node = (pm_rational_node_t) {
4948 {
4949 .type = PM_RATIONAL_NODE,
4950 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4951 .node_id = PM_NODE_IDENTIFY(parser),
4952 .location = PM_LOCATION_TOKEN_VALUE(token)
4953 },
4954 .numerator = { 0 },
4955 .denominator = { .value = 1, 0 }
4956 };
4957
4958 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4959 switch (base) {
4960 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4961 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4962 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4963 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4964 default: assert(false && "unreachable"); break;
4965 }
4966
4967 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4968
4969 return node;
4970}
4971
4976static pm_imaginary_node_t *
4977pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4978 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4979
4980 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4981 *node = (pm_imaginary_node_t) {
4982 {
4983 .type = PM_IMAGINARY_NODE,
4984 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4985 .node_id = PM_NODE_IDENTIFY(parser),
4986 .location = PM_LOCATION_TOKEN_VALUE(token)
4987 },
4988 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4989 .type = PM_TOKEN_INTEGER_RATIONAL,
4990 .start = token->start,
4991 .end = token->end - 1
4992 }))
4993 };
4994
4995 return node;
4996}
4997
5001static pm_in_node_t *
5002pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5003 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5004
5005 const uint8_t *end;
5006 if (statements != NULL) {
5007 end = statements->base.location.end;
5008 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5009 end = then_keyword->end;
5010 } else {
5011 end = pattern->location.end;
5012 }
5013
5014 *node = (pm_in_node_t) {
5015 {
5016 .type = PM_IN_NODE,
5017 .node_id = PM_NODE_IDENTIFY(parser),
5018 .location = {
5019 .start = in_keyword->start,
5020 .end = end
5021 },
5022 },
5023 .pattern = pattern,
5024 .statements = statements,
5025 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5026 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5027 };
5028
5029 return node;
5030}
5031
5035static pm_instance_variable_and_write_node_t *
5036pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5037 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5038 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5039
5040 *node = (pm_instance_variable_and_write_node_t) {
5041 {
5042 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5043 .node_id = PM_NODE_IDENTIFY(parser),
5044 .location = {
5045 .start = target->base.location.start,
5046 .end = value->location.end
5047 }
5048 },
5049 .name = target->name,
5050 .name_loc = target->base.location,
5051 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5052 .value = value
5053 };
5054
5055 return node;
5056}
5057
5061static pm_instance_variable_operator_write_node_t *
5062pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5063 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5064
5065 *node = (pm_instance_variable_operator_write_node_t) {
5066 {
5067 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5068 .node_id = PM_NODE_IDENTIFY(parser),
5069 .location = {
5070 .start = target->base.location.start,
5071 .end = value->location.end
5072 }
5073 },
5074 .name = target->name,
5075 .name_loc = target->base.location,
5076 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5077 .value = value,
5078 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5079 };
5080
5081 return node;
5082}
5083
5087static pm_instance_variable_or_write_node_t *
5088pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5089 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5090 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5091
5092 *node = (pm_instance_variable_or_write_node_t) {
5093 {
5094 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5095 .node_id = PM_NODE_IDENTIFY(parser),
5096 .location = {
5097 .start = target->base.location.start,
5098 .end = value->location.end
5099 }
5100 },
5101 .name = target->name,
5102 .name_loc = target->base.location,
5103 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5104 .value = value
5105 };
5106
5107 return node;
5108}
5109
5113static pm_instance_variable_read_node_t *
5114pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5115 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5116 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5117
5118 *node = (pm_instance_variable_read_node_t) {
5119 {
5120 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5121 .node_id = PM_NODE_IDENTIFY(parser),
5122 .location = PM_LOCATION_TOKEN_VALUE(token)
5123 },
5124 .name = pm_parser_constant_id_token(parser, token)
5125 };
5126
5127 return node;
5128}
5129
5134static pm_instance_variable_write_node_t *
5135pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5136 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5137 *node = (pm_instance_variable_write_node_t) {
5138 {
5139 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5140 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5141 .node_id = PM_NODE_IDENTIFY(parser),
5142 .location = {
5143 .start = read_node->base.location.start,
5144 .end = value->location.end
5145 }
5146 },
5147 .name = read_node->name,
5148 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5149 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5150 .value = value
5151 };
5152
5153 return node;
5154}
5155
5161static void
5162pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5163 switch (PM_NODE_TYPE(part)) {
5164 case PM_STRING_NODE:
5165 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5166 break;
5167 case PM_EMBEDDED_STATEMENTS_NODE: {
5168 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5169 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5170
5171 if (embedded == NULL) {
5172 // If there are no statements or more than one statement, then
5173 // we lose the static literal flag.
5174 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5175 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5176 // If the embedded statement is a string, then we can keep the
5177 // static literal flag and mark the string as frozen.
5178 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5179 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5180 // If the embedded statement is an interpolated string and it's
5181 // a static literal, then we can keep the static literal flag.
5182 } else {
5183 // Otherwise we lose the static literal flag.
5184 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5185 }
5186
5187 break;
5188 }
5189 case PM_EMBEDDED_VARIABLE_NODE:
5190 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5191 break;
5192 default:
5193 assert(false && "unexpected node type");
5194 break;
5195 }
5196
5197 pm_node_list_append(parts, part);
5198}
5199
5203static pm_interpolated_regular_expression_node_t *
5204pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5205 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5206
5207 *node = (pm_interpolated_regular_expression_node_t) {
5208 {
5209 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5210 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5211 .node_id = PM_NODE_IDENTIFY(parser),
5212 .location = {
5213 .start = opening->start,
5214 .end = NULL,
5215 },
5216 },
5217 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5219 .parts = { 0 }
5220 };
5221
5222 return node;
5223}
5224
5225static inline void
5226pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5227 if (node->base.location.start > part->location.start) {
5228 node->base.location.start = part->location.start;
5229 }
5230 if (node->base.location.end < part->location.end) {
5231 node->base.location.end = part->location.end;
5232 }
5233
5234 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5235}
5236
5237static inline void
5238pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5239 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5240 node->base.location.end = closing->end;
5241 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5242}
5243
5267static inline void
5268pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5269#define CLEAR_FLAGS(node) \
5270 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5271
5272#define MUTABLE_FLAGS(node) \
5273 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5274
5275 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5276 node->base.location.start = part->location.start;
5277 }
5278
5279 node->base.location.end = MAX(node->base.location.end, part->location.end);
5280
5281 switch (PM_NODE_TYPE(part)) {
5282 case PM_STRING_NODE:
5283 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5284 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5285 // as long as this interpolation only consists of other string literals.
5286 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5287 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5288 }
5289 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5290 break;
5291 case PM_INTERPOLATED_STRING_NODE:
5292 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5293 // If the string that we're concatenating is a static literal,
5294 // then we can keep the static literal flag for this string.
5295 } else {
5296 // Otherwise, we lose the static literal flag here and we should
5297 // also clear the mutability flags.
5298 CLEAR_FLAGS(node);
5299 }
5300 break;
5301 case PM_EMBEDDED_STATEMENTS_NODE: {
5302 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5303 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5304
5305 if (embedded == NULL) {
5306 // If we're embedding multiple statements or no statements, then
5307 // the string is not longer a static literal.
5308 CLEAR_FLAGS(node);
5309 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5310 // If the embedded statement is a string, then we can make that
5311 // string as frozen and static literal, and not touch the static
5312 // literal status of this string.
5313 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5314
5315 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 MUTABLE_FLAGS(node);
5317 }
5318 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5319 // If the embedded statement is an interpolated string, but that
5320 // string is marked as static literal, then we can keep our
5321 // static literal status for this string.
5322 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5323 MUTABLE_FLAGS(node);
5324 }
5325 } else {
5326 // In all other cases, we lose the static literal flag here and
5327 // become mutable.
5328 CLEAR_FLAGS(node);
5329 }
5330
5331 break;
5332 }
5333 case PM_EMBEDDED_VARIABLE_NODE:
5334 // Embedded variables clear static literal, which means we also
5335 // should clear the mutability flags.
5336 CLEAR_FLAGS(node);
5337 break;
5338 case PM_X_STRING_NODE:
5339 case PM_INTERPOLATED_X_STRING_NODE:
5340 // If this is an x string, then this is a syntax error. But we want
5341 // to handle it here so that we don't fail the assertion.
5342 CLEAR_FLAGS(node);
5343 break;
5344 default:
5345 assert(false && "unexpected node type");
5346 break;
5347 }
5348
5349 pm_node_list_append(&node->parts, part);
5350
5351#undef CLEAR_FLAGS
5352#undef MUTABLE_FLAGS
5353}
5354
5358static pm_interpolated_string_node_t *
5359pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5360 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5361 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5362
5363 switch (parser->frozen_string_literal) {
5364 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5365 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5366 break;
5367 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5368 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5369 break;
5370 }
5371
5372 *node = (pm_interpolated_string_node_t) {
5373 {
5374 .type = PM_INTERPOLATED_STRING_NODE,
5375 .flags = flags,
5376 .node_id = PM_NODE_IDENTIFY(parser),
5377 .location = {
5378 .start = opening->start,
5379 .end = closing->end,
5380 },
5381 },
5382 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5383 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5384 .parts = { 0 }
5385 };
5386
5387 if (parts != NULL) {
5388 pm_node_t *part;
5389 PM_NODE_LIST_FOREACH(parts, index, part) {
5390 pm_interpolated_string_node_append(node, part);
5391 }
5392 }
5393
5394 return node;
5395}
5396
5400static void
5401pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5402 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5403 node->base.location.end = closing->end;
5404}
5405
5406static void
5407pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5408 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5409 node->base.location.start = part->location.start;
5410 }
5411
5412 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5413 node->base.location.end = MAX(node->base.location.end, part->location.end);
5414}
5415
5416static void
5417pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5418 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5419 node->base.location.end = closing->end;
5420}
5421
5425static pm_interpolated_symbol_node_t *
5426pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5427 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5428
5429 *node = (pm_interpolated_symbol_node_t) {
5430 {
5431 .type = PM_INTERPOLATED_SYMBOL_NODE,
5432 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5433 .node_id = PM_NODE_IDENTIFY(parser),
5434 .location = {
5435 .start = opening->start,
5436 .end = closing->end,
5437 },
5438 },
5439 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5440 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5441 .parts = { 0 }
5442 };
5443
5444 if (parts != NULL) {
5445 pm_node_t *part;
5446 PM_NODE_LIST_FOREACH(parts, index, part) {
5447 pm_interpolated_symbol_node_append(node, part);
5448 }
5449 }
5450
5451 return node;
5452}
5453
5457static pm_interpolated_x_string_node_t *
5458pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5459 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5460
5461 *node = (pm_interpolated_x_string_node_t) {
5462 {
5463 .type = PM_INTERPOLATED_X_STRING_NODE,
5464 .node_id = PM_NODE_IDENTIFY(parser),
5465 .location = {
5466 .start = opening->start,
5467 .end = closing->end
5468 },
5469 },
5470 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5471 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5472 .parts = { 0 }
5473 };
5474
5475 return node;
5476}
5477
5478static inline void
5479pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5480 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5481 node->base.location.end = part->location.end;
5482}
5483
5484static inline void
5485pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5486 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5487 node->base.location.end = closing->end;
5488}
5489
5493static pm_it_local_variable_read_node_t *
5494pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5495 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5496
5497 *node = (pm_it_local_variable_read_node_t) {
5498 {
5499 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5500 .node_id = PM_NODE_IDENTIFY(parser),
5501 .location = PM_LOCATION_TOKEN_VALUE(name)
5502 }
5503 };
5504
5505 return node;
5506}
5507
5511static pm_it_parameters_node_t *
5512pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5513 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5514
5515 *node = (pm_it_parameters_node_t) {
5516 {
5517 .type = PM_IT_PARAMETERS_NODE,
5518 .node_id = PM_NODE_IDENTIFY(parser),
5519 .location = {
5520 .start = opening->start,
5521 .end = closing->end
5522 }
5523 }
5524 };
5525
5526 return node;
5527}
5528
5532static pm_keyword_hash_node_t *
5533pm_keyword_hash_node_create(pm_parser_t *parser) {
5534 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5535
5536 *node = (pm_keyword_hash_node_t) {
5537 .base = {
5538 .type = PM_KEYWORD_HASH_NODE,
5539 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5540 .node_id = PM_NODE_IDENTIFY(parser),
5541 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5542 },
5543 .elements = { 0 }
5544 };
5545
5546 return node;
5547}
5548
5552static void
5553pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5554 // If the element being added is not an AssocNode or does not have a symbol
5555 // key, then we want to turn the SYMBOL_KEYS flag off.
5556 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5557 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5558 }
5559
5560 pm_node_list_append(&hash->elements, element);
5561 if (hash->base.location.start == NULL) {
5562 hash->base.location.start = element->location.start;
5563 }
5564 hash->base.location.end = element->location.end;
5565}
5566
5570static pm_required_keyword_parameter_node_t *
5571pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5572 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5573
5574 *node = (pm_required_keyword_parameter_node_t) {
5575 {
5576 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5577 .node_id = PM_NODE_IDENTIFY(parser),
5578 .location = {
5579 .start = name->start,
5580 .end = name->end
5581 },
5582 },
5583 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5584 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5585 };
5586
5587 return node;
5588}
5589
5593static pm_optional_keyword_parameter_node_t *
5594pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5595 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5596
5597 *node = (pm_optional_keyword_parameter_node_t) {
5598 {
5599 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5600 .node_id = PM_NODE_IDENTIFY(parser),
5601 .location = {
5602 .start = name->start,
5603 .end = value->location.end
5604 },
5605 },
5606 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5607 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5608 .value = value
5609 };
5610
5611 return node;
5612}
5613
5617static pm_keyword_rest_parameter_node_t *
5618pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5619 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5620
5621 *node = (pm_keyword_rest_parameter_node_t) {
5622 {
5623 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5624 .node_id = PM_NODE_IDENTIFY(parser),
5625 .location = {
5626 .start = operator->start,
5627 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5628 },
5629 },
5630 .name = pm_parser_optional_constant_id_token(parser, name),
5631 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5632 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5633 };
5634
5635 return node;
5636}
5637
5641static pm_lambda_node_t *
5642pm_lambda_node_create(
5643 pm_parser_t *parser,
5644 pm_constant_id_list_t *locals,
5645 const pm_token_t *operator,
5646 const pm_token_t *opening,
5647 const pm_token_t *closing,
5648 pm_node_t *parameters,
5649 pm_node_t *body
5650) {
5651 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5652
5653 *node = (pm_lambda_node_t) {
5654 {
5655 .type = PM_LAMBDA_NODE,
5656 .node_id = PM_NODE_IDENTIFY(parser),
5657 .location = {
5658 .start = operator->start,
5659 .end = closing->end
5660 },
5661 },
5662 .locals = *locals,
5663 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5664 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5665 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5666 .parameters = parameters,
5667 .body = body
5668 };
5669
5670 return node;
5671}
5672
5676static pm_local_variable_and_write_node_t *
5677pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5678 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5679 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5680 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5681
5682 *node = (pm_local_variable_and_write_node_t) {
5683 {
5684 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5685 .node_id = PM_NODE_IDENTIFY(parser),
5686 .location = {
5687 .start = target->location.start,
5688 .end = value->location.end
5689 }
5690 },
5691 .name_loc = target->location,
5692 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5693 .value = value,
5694 .name = name,
5695 .depth = depth
5696 };
5697
5698 return node;
5699}
5700
5704static pm_local_variable_operator_write_node_t *
5705pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5706 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5707
5708 *node = (pm_local_variable_operator_write_node_t) {
5709 {
5710 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5711 .node_id = PM_NODE_IDENTIFY(parser),
5712 .location = {
5713 .start = target->location.start,
5714 .end = value->location.end
5715 }
5716 },
5717 .name_loc = target->location,
5718 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5719 .value = value,
5720 .name = name,
5721 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5722 .depth = depth
5723 };
5724
5725 return node;
5726}
5727
5731static pm_local_variable_or_write_node_t *
5732pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5733 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5734 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5735 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5736
5737 *node = (pm_local_variable_or_write_node_t) {
5738 {
5739 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5740 .node_id = PM_NODE_IDENTIFY(parser),
5741 .location = {
5742 .start = target->location.start,
5743 .end = value->location.end
5744 }
5745 },
5746 .name_loc = target->location,
5747 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5748 .value = value,
5749 .name = name,
5750 .depth = depth
5751 };
5752
5753 return node;
5754}
5755
5759static pm_local_variable_read_node_t *
5760pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5761 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5762
5763 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5764
5765 *node = (pm_local_variable_read_node_t) {
5766 {
5767 .type = PM_LOCAL_VARIABLE_READ_NODE,
5768 .node_id = PM_NODE_IDENTIFY(parser),
5769 .location = PM_LOCATION_TOKEN_VALUE(name)
5770 },
5771 .name = name_id,
5772 .depth = depth
5773 };
5774
5775 return node;
5776}
5777
5781static pm_local_variable_read_node_t *
5782pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5783 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5784 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5785}
5786
5791static pm_local_variable_read_node_t *
5792pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5793 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5794 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5795}
5796
5800static pm_local_variable_write_node_t *
5801pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5802 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5803
5804 *node = (pm_local_variable_write_node_t) {
5805 {
5806 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5807 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5808 .node_id = PM_NODE_IDENTIFY(parser),
5809 .location = {
5810 .start = name_loc->start,
5811 .end = value->location.end
5812 }
5813 },
5814 .name = name,
5815 .depth = depth,
5816 .value = value,
5817 .name_loc = *name_loc,
5818 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5819 };
5820
5821 return node;
5822}
5823
5827static inline bool
5828pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5829 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5830}
5831
5836static inline bool
5837pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5838 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5839}
5840
5845static inline void
5846pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5847 if (pm_token_is_numbered_parameter(start, end)) {
5848 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5849 }
5850}
5851
5856static pm_local_variable_target_node_t *
5857pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5858 pm_refute_numbered_parameter(parser, location->start, location->end);
5859 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5860
5861 *node = (pm_local_variable_target_node_t) {
5862 {
5863 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5864 .node_id = PM_NODE_IDENTIFY(parser),
5865 .location = *location
5866 },
5867 .name = name,
5868 .depth = depth
5869 };
5870
5871 return node;
5872}
5873
5877static pm_match_predicate_node_t *
5878pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5879 pm_assert_value_expression(parser, value);
5880
5881 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5882
5883 *node = (pm_match_predicate_node_t) {
5884 {
5885 .type = PM_MATCH_PREDICATE_NODE,
5886 .node_id = PM_NODE_IDENTIFY(parser),
5887 .location = {
5888 .start = value->location.start,
5889 .end = pattern->location.end
5890 }
5891 },
5892 .value = value,
5893 .pattern = pattern,
5894 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5895 };
5896
5897 return node;
5898}
5899
5903static pm_match_required_node_t *
5904pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5905 pm_assert_value_expression(parser, value);
5906
5907 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5908
5909 *node = (pm_match_required_node_t) {
5910 {
5911 .type = PM_MATCH_REQUIRED_NODE,
5912 .node_id = PM_NODE_IDENTIFY(parser),
5913 .location = {
5914 .start = value->location.start,
5915 .end = pattern->location.end
5916 }
5917 },
5918 .value = value,
5919 .pattern = pattern,
5920 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5921 };
5922
5923 return node;
5924}
5925
5929static pm_match_write_node_t *
5930pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5931 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5932
5933 *node = (pm_match_write_node_t) {
5934 {
5935 .type = PM_MATCH_WRITE_NODE,
5936 .node_id = PM_NODE_IDENTIFY(parser),
5937 .location = call->base.location
5938 },
5939 .call = call,
5940 .targets = { 0 }
5941 };
5942
5943 return node;
5944}
5945
5949static pm_module_node_t *
5950pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5951 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5952
5953 *node = (pm_module_node_t) {
5954 {
5955 .type = PM_MODULE_NODE,
5956 .node_id = PM_NODE_IDENTIFY(parser),
5957 .location = {
5958 .start = module_keyword->start,
5959 .end = end_keyword->end
5960 }
5961 },
5962 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5963 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5964 .constant_path = constant_path,
5965 .body = body,
5966 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5967 .name = pm_parser_constant_id_token(parser, name)
5968 };
5969
5970 return node;
5971}
5972
5976static pm_multi_target_node_t *
5977pm_multi_target_node_create(pm_parser_t *parser) {
5978 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5979
5980 *node = (pm_multi_target_node_t) {
5981 {
5982 .type = PM_MULTI_TARGET_NODE,
5983 .node_id = PM_NODE_IDENTIFY(parser),
5984 .location = { .start = NULL, .end = NULL }
5985 },
5986 .lefts = { 0 },
5987 .rest = NULL,
5988 .rights = { 0 },
5989 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5990 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5991 };
5992
5993 return node;
5994}
5995
5999static void
6000pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
6001 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
6002 if (node->rest == NULL) {
6003 node->rest = target;
6004 } else {
6005 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
6006 pm_node_list_append(&node->rights, target);
6007 }
6008 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6009 if (node->rest == NULL) {
6010 node->rest = target;
6011 } else {
6012 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6013 pm_node_list_append(&node->rights, target);
6014 }
6015 } else if (node->rest == NULL) {
6016 pm_node_list_append(&node->lefts, target);
6017 } else {
6018 pm_node_list_append(&node->rights, target);
6019 }
6020
6021 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6022 node->base.location.start = target->location.start;
6023 }
6024
6025 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6026 node->base.location.end = target->location.end;
6027 }
6028}
6029
6033static void
6034pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6035 node->base.location.start = lparen->start;
6036 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6037}
6038
6042static void
6043pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6044 node->base.location.end = rparen->end;
6045 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6046}
6047
6051static pm_multi_write_node_t *
6052pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6053 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6054
6055 *node = (pm_multi_write_node_t) {
6056 {
6057 .type = PM_MULTI_WRITE_NODE,
6058 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6059 .node_id = PM_NODE_IDENTIFY(parser),
6060 .location = {
6061 .start = target->base.location.start,
6062 .end = value->location.end
6063 }
6064 },
6065 .lefts = target->lefts,
6066 .rest = target->rest,
6067 .rights = target->rights,
6068 .lparen_loc = target->lparen_loc,
6069 .rparen_loc = target->rparen_loc,
6070 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6071 .value = value
6072 };
6073
6074 // Explicitly do not call pm_node_destroy here because we want to keep
6075 // around all of the information within the MultiWriteNode node.
6076 xfree(target);
6077
6078 return node;
6079}
6080
6084static pm_next_node_t *
6085pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6086 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6087 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6088
6089 *node = (pm_next_node_t) {
6090 {
6091 .type = PM_NEXT_NODE,
6092 .node_id = PM_NODE_IDENTIFY(parser),
6093 .location = {
6094 .start = keyword->start,
6095 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6096 }
6097 },
6098 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6099 .arguments = arguments
6100 };
6101
6102 return node;
6103}
6104
6108static pm_nil_node_t *
6109pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6110 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6111 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6112
6113 *node = (pm_nil_node_t) {{
6114 .type = PM_NIL_NODE,
6115 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6116 .node_id = PM_NODE_IDENTIFY(parser),
6117 .location = PM_LOCATION_TOKEN_VALUE(token)
6118 }};
6119
6120 return node;
6121}
6122
6126static pm_no_keywords_parameter_node_t *
6127pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6128 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6129 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6130 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6131
6132 *node = (pm_no_keywords_parameter_node_t) {
6133 {
6134 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6135 .node_id = PM_NODE_IDENTIFY(parser),
6136 .location = {
6137 .start = operator->start,
6138 .end = keyword->end
6139 }
6140 },
6141 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6142 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6143 };
6144
6145 return node;
6146}
6147
6151static pm_numbered_parameters_node_t *
6152pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6153 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6154
6155 *node = (pm_numbered_parameters_node_t) {
6156 {
6157 .type = PM_NUMBERED_PARAMETERS_NODE,
6158 .node_id = PM_NODE_IDENTIFY(parser),
6159 .location = *location
6160 },
6161 .maximum = maximum
6162 };
6163
6164 return node;
6165}
6166
6171#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6172
6179static uint32_t
6180pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6181 const uint8_t *start = token->start + 1;
6182 const uint8_t *end = token->end;
6183
6184 ptrdiff_t diff = end - start;
6185 assert(diff > 0);
6186#if PTRDIFF_MAX > SIZE_MAX
6187 assert(diff < (ptrdiff_t) SIZE_MAX);
6188#endif
6189 size_t length = (size_t) diff;
6190
6191 char *digits = xcalloc(length + 1, sizeof(char));
6192 memcpy(digits, start, length);
6193 digits[length] = '\0';
6194
6195 char *endptr;
6196 errno = 0;
6197 unsigned long value = strtoul(digits, &endptr, 10);
6198
6199 if ((digits == endptr) || (*endptr != '\0')) {
6200 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6201 value = 0;
6202 }
6203
6204 xfree(digits);
6205
6206 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6207 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6208 value = 0;
6209 }
6210
6211 return (uint32_t) value;
6212}
6213
6214#undef NTH_REF_MAX
6215
6219static pm_numbered_reference_read_node_t *
6220pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6221 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6222 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6223
6224 *node = (pm_numbered_reference_read_node_t) {
6225 {
6226 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6227 .node_id = PM_NODE_IDENTIFY(parser),
6228 .location = PM_LOCATION_TOKEN_VALUE(name),
6229 },
6230 .number = pm_numbered_reference_read_node_number(parser, name)
6231 };
6232
6233 return node;
6234}
6235
6239static pm_optional_parameter_node_t *
6240pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6241 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6242
6243 *node = (pm_optional_parameter_node_t) {
6244 {
6245 .type = PM_OPTIONAL_PARAMETER_NODE,
6246 .node_id = PM_NODE_IDENTIFY(parser),
6247 .location = {
6248 .start = name->start,
6249 .end = value->location.end
6250 }
6251 },
6252 .name = pm_parser_constant_id_token(parser, name),
6253 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6254 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6255 .value = value
6256 };
6257
6258 return node;
6259}
6260
6264static pm_or_node_t *
6265pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6266 pm_assert_value_expression(parser, left);
6267
6268 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6269
6270 *node = (pm_or_node_t) {
6271 {
6272 .type = PM_OR_NODE,
6273 .node_id = PM_NODE_IDENTIFY(parser),
6274 .location = {
6275 .start = left->location.start,
6276 .end = right->location.end
6277 }
6278 },
6279 .left = left,
6280 .right = right,
6281 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6282 };
6283
6284 return node;
6285}
6286
6290static pm_parameters_node_t *
6291pm_parameters_node_create(pm_parser_t *parser) {
6292 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6293
6294 *node = (pm_parameters_node_t) {
6295 {
6296 .type = PM_PARAMETERS_NODE,
6297 .node_id = PM_NODE_IDENTIFY(parser),
6298 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6299 },
6300 .rest = NULL,
6301 .keyword_rest = NULL,
6302 .block = NULL,
6303 .requireds = { 0 },
6304 .optionals = { 0 },
6305 .posts = { 0 },
6306 .keywords = { 0 }
6307 };
6308
6309 return node;
6310}
6311
6315static void
6316pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6317 if (params->base.location.start == NULL) {
6318 params->base.location.start = param->location.start;
6319 } else {
6320 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6321 }
6322
6323 if (params->base.location.end == NULL) {
6324 params->base.location.end = param->location.end;
6325 } else {
6326 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6327 }
6328}
6329
6333static void
6334pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6335 pm_parameters_node_location_set(params, param);
6336 pm_node_list_append(&params->requireds, param);
6337}
6338
6342static void
6343pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6344 pm_parameters_node_location_set(params, (pm_node_t *) param);
6345 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6346}
6347
6351static void
6352pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6353 pm_parameters_node_location_set(params, param);
6354 pm_node_list_append(&params->posts, param);
6355}
6356
6360static void
6361pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6362 pm_parameters_node_location_set(params, param);
6363 params->rest = param;
6364}
6365
6369static void
6370pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6371 pm_parameters_node_location_set(params, param);
6372 pm_node_list_append(&params->keywords, param);
6373}
6374
6378static void
6379pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6380 assert(params->keyword_rest == NULL);
6381 pm_parameters_node_location_set(params, param);
6382 params->keyword_rest = param;
6383}
6384
6388static void
6389pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6390 assert(params->block == NULL);
6391 pm_parameters_node_location_set(params, (pm_node_t *) param);
6392 params->block = param;
6393}
6394
6398static pm_program_node_t *
6399pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6400 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6401
6402 *node = (pm_program_node_t) {
6403 {
6404 .type = PM_PROGRAM_NODE,
6405 .node_id = PM_NODE_IDENTIFY(parser),
6406 .location = {
6407 .start = statements == NULL ? parser->start : statements->base.location.start,
6408 .end = statements == NULL ? parser->end : statements->base.location.end
6409 }
6410 },
6411 .locals = *locals,
6412 .statements = statements
6413 };
6414
6415 return node;
6416}
6417
6421static pm_parentheses_node_t *
6422pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6423 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6424
6425 *node = (pm_parentheses_node_t) {
6426 {
6427 .type = PM_PARENTHESES_NODE,
6428 .flags = flags,
6429 .node_id = PM_NODE_IDENTIFY(parser),
6430 .location = {
6431 .start = opening->start,
6432 .end = closing->end
6433 }
6434 },
6435 .body = body,
6436 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6437 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6438 };
6439
6440 return node;
6441}
6442
6446static pm_pinned_expression_node_t *
6447pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6448 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6449
6450 *node = (pm_pinned_expression_node_t) {
6451 {
6452 .type = PM_PINNED_EXPRESSION_NODE,
6453 .node_id = PM_NODE_IDENTIFY(parser),
6454 .location = {
6455 .start = operator->start,
6456 .end = rparen->end
6457 }
6458 },
6459 .expression = expression,
6460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6461 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6462 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6463 };
6464
6465 return node;
6466}
6467
6471static pm_pinned_variable_node_t *
6472pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6473 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6474
6475 *node = (pm_pinned_variable_node_t) {
6476 {
6477 .type = PM_PINNED_VARIABLE_NODE,
6478 .node_id = PM_NODE_IDENTIFY(parser),
6479 .location = {
6480 .start = operator->start,
6481 .end = variable->location.end
6482 }
6483 },
6484 .variable = variable,
6485 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6486 };
6487
6488 return node;
6489}
6490
6494static pm_post_execution_node_t *
6495pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6496 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6497
6498 *node = (pm_post_execution_node_t) {
6499 {
6500 .type = PM_POST_EXECUTION_NODE,
6501 .node_id = PM_NODE_IDENTIFY(parser),
6502 .location = {
6503 .start = keyword->start,
6504 .end = closing->end
6505 }
6506 },
6507 .statements = statements,
6508 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6509 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6510 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6511 };
6512
6513 return node;
6514}
6515
6519static pm_pre_execution_node_t *
6520pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6521 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6522
6523 *node = (pm_pre_execution_node_t) {
6524 {
6525 .type = PM_PRE_EXECUTION_NODE,
6526 .node_id = PM_NODE_IDENTIFY(parser),
6527 .location = {
6528 .start = keyword->start,
6529 .end = closing->end
6530 }
6531 },
6532 .statements = statements,
6533 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6534 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6535 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6536 };
6537
6538 return node;
6539}
6540
6544static pm_range_node_t *
6545pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6546 pm_assert_value_expression(parser, left);
6547 pm_assert_value_expression(parser, right);
6548
6549 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6550 pm_node_flags_t flags = 0;
6551
6552 // Indicate that this node is an exclusive range if the operator is `...`.
6553 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6554 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6555 }
6556
6557 // Indicate that this node is a static literal (i.e., can be compiled with
6558 // a putobject in CRuby) if the left and right are implicit nil, explicit
6559 // nil, or integers.
6560 if (
6561 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6562 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6563 ) {
6564 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6565 }
6566
6567 *node = (pm_range_node_t) {
6568 {
6569 .type = PM_RANGE_NODE,
6570 .flags = flags,
6571 .node_id = PM_NODE_IDENTIFY(parser),
6572 .location = {
6573 .start = (left == NULL ? operator->start : left->location.start),
6574 .end = (right == NULL ? operator->end : right->location.end)
6575 }
6576 },
6577 .left = left,
6578 .right = right,
6579 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6580 };
6581
6582 return node;
6583}
6584
6588static pm_redo_node_t *
6589pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6590 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6591 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6592
6593 *node = (pm_redo_node_t) {{
6594 .type = PM_REDO_NODE,
6595 .node_id = PM_NODE_IDENTIFY(parser),
6596 .location = PM_LOCATION_TOKEN_VALUE(token)
6597 }};
6598
6599 return node;
6600}
6601
6606static pm_regular_expression_node_t *
6607pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6608 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6609
6610 *node = (pm_regular_expression_node_t) {
6611 {
6612 .type = PM_REGULAR_EXPRESSION_NODE,
6613 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6614 .node_id = PM_NODE_IDENTIFY(parser),
6615 .location = {
6616 .start = MIN(opening->start, closing->start),
6617 .end = MAX(opening->end, closing->end)
6618 }
6619 },
6620 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6621 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6622 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6623 .unescaped = *unescaped
6624 };
6625
6626 return node;
6627}
6628
6632static inline pm_regular_expression_node_t *
6633pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6634 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6635}
6636
6640static pm_required_parameter_node_t *
6641pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6642 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6643
6644 *node = (pm_required_parameter_node_t) {
6645 {
6646 .type = PM_REQUIRED_PARAMETER_NODE,
6647 .node_id = PM_NODE_IDENTIFY(parser),
6648 .location = PM_LOCATION_TOKEN_VALUE(token)
6649 },
6650 .name = pm_parser_constant_id_token(parser, token)
6651 };
6652
6653 return node;
6654}
6655
6659static pm_rescue_modifier_node_t *
6660pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6661 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6662
6663 *node = (pm_rescue_modifier_node_t) {
6664 {
6665 .type = PM_RESCUE_MODIFIER_NODE,
6666 .node_id = PM_NODE_IDENTIFY(parser),
6667 .location = {
6668 .start = expression->location.start,
6669 .end = rescue_expression->location.end
6670 }
6671 },
6672 .expression = expression,
6673 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6674 .rescue_expression = rescue_expression
6675 };
6676
6677 return node;
6678}
6679
6683static pm_rescue_node_t *
6684pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6685 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6686
6687 *node = (pm_rescue_node_t) {
6688 {
6689 .type = PM_RESCUE_NODE,
6690 .node_id = PM_NODE_IDENTIFY(parser),
6691 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6692 },
6693 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6694 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6695 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6696 .reference = NULL,
6697 .statements = NULL,
6698 .subsequent = NULL,
6699 .exceptions = { 0 }
6700 };
6701
6702 return node;
6703}
6704
6705static inline void
6706pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6707 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6708}
6709
6713static void
6714pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6715 node->reference = reference;
6716 node->base.location.end = reference->location.end;
6717}
6718
6722static void
6723pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6724 node->statements = statements;
6725 if (pm_statements_node_body_length(statements) > 0) {
6726 node->base.location.end = statements->base.location.end;
6727 }
6728}
6729
6733static void
6734pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6735 node->subsequent = subsequent;
6736 node->base.location.end = subsequent->base.location.end;
6737}
6738
6742static void
6743pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6744 pm_node_list_append(&node->exceptions, exception);
6745 node->base.location.end = exception->location.end;
6746}
6747
6751static pm_rest_parameter_node_t *
6752pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6753 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6754
6755 *node = (pm_rest_parameter_node_t) {
6756 {
6757 .type = PM_REST_PARAMETER_NODE,
6758 .node_id = PM_NODE_IDENTIFY(parser),
6759 .location = {
6760 .start = operator->start,
6761 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6762 }
6763 },
6764 .name = pm_parser_optional_constant_id_token(parser, name),
6765 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6766 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6767 };
6768
6769 return node;
6770}
6771
6775static pm_retry_node_t *
6776pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6777 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6778 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6779
6780 *node = (pm_retry_node_t) {{
6781 .type = PM_RETRY_NODE,
6782 .node_id = PM_NODE_IDENTIFY(parser),
6783 .location = PM_LOCATION_TOKEN_VALUE(token)
6784 }};
6785
6786 return node;
6787}
6788
6792static pm_return_node_t *
6793pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6794 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6795
6796 *node = (pm_return_node_t) {
6797 {
6798 .type = PM_RETURN_NODE,
6799 .node_id = PM_NODE_IDENTIFY(parser),
6800 .location = {
6801 .start = keyword->start,
6802 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6803 }
6804 },
6805 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6806 .arguments = arguments
6807 };
6808
6809 return node;
6810}
6811
6815static pm_self_node_t *
6816pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6817 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6818 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6819
6820 *node = (pm_self_node_t) {{
6821 .type = PM_SELF_NODE,
6822 .node_id = PM_NODE_IDENTIFY(parser),
6823 .location = PM_LOCATION_TOKEN_VALUE(token)
6824 }};
6825
6826 return node;
6827}
6828
6832static pm_shareable_constant_node_t *
6833pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6834 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6835
6836 *node = (pm_shareable_constant_node_t) {
6837 {
6838 .type = PM_SHAREABLE_CONSTANT_NODE,
6839 .flags = (pm_node_flags_t) value,
6840 .node_id = PM_NODE_IDENTIFY(parser),
6841 .location = PM_LOCATION_NODE_VALUE(write)
6842 },
6843 .write = write
6844 };
6845
6846 return node;
6847}
6848
6852static pm_singleton_class_node_t *
6853pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6854 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6855
6856 *node = (pm_singleton_class_node_t) {
6857 {
6858 .type = PM_SINGLETON_CLASS_NODE,
6859 .node_id = PM_NODE_IDENTIFY(parser),
6860 .location = {
6861 .start = class_keyword->start,
6862 .end = end_keyword->end
6863 }
6864 },
6865 .locals = *locals,
6866 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6867 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6868 .expression = expression,
6869 .body = body,
6870 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6871 };
6872
6873 return node;
6874}
6875
6879static pm_source_encoding_node_t *
6880pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6881 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6882 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6883
6884 *node = (pm_source_encoding_node_t) {{
6885 .type = PM_SOURCE_ENCODING_NODE,
6886 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6887 .node_id = PM_NODE_IDENTIFY(parser),
6888 .location = PM_LOCATION_TOKEN_VALUE(token)
6889 }};
6890
6891 return node;
6892}
6893
6897static pm_source_file_node_t*
6898pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6899 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6900 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6901
6902 pm_node_flags_t flags = 0;
6903
6904 switch (parser->frozen_string_literal) {
6905 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6906 flags |= PM_STRING_FLAGS_MUTABLE;
6907 break;
6908 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6909 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6910 break;
6911 }
6912
6913 *node = (pm_source_file_node_t) {
6914 {
6915 .type = PM_SOURCE_FILE_NODE,
6916 .flags = flags,
6917 .node_id = PM_NODE_IDENTIFY(parser),
6918 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6919 },
6920 .filepath = parser->filepath
6921 };
6922
6923 return node;
6924}
6925
6929static pm_source_line_node_t *
6930pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6931 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6932 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6933
6934 *node = (pm_source_line_node_t) {{
6935 .type = PM_SOURCE_LINE_NODE,
6936 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6937 .node_id = PM_NODE_IDENTIFY(parser),
6938 .location = PM_LOCATION_TOKEN_VALUE(token)
6939 }};
6940
6941 return node;
6942}
6943
6947static pm_splat_node_t *
6948pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6949 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6950
6951 *node = (pm_splat_node_t) {
6952 {
6953 .type = PM_SPLAT_NODE,
6954 .node_id = PM_NODE_IDENTIFY(parser),
6955 .location = {
6956 .start = operator->start,
6957 .end = (expression == NULL ? operator->end : expression->location.end)
6958 }
6959 },
6960 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6961 .expression = expression
6962 };
6963
6964 return node;
6965}
6966
6970static pm_statements_node_t *
6971pm_statements_node_create(pm_parser_t *parser) {
6972 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6973
6974 *node = (pm_statements_node_t) {
6975 {
6976 .type = PM_STATEMENTS_NODE,
6977 .node_id = PM_NODE_IDENTIFY(parser),
6978 .location = PM_LOCATION_NULL_VALUE(parser)
6979 },
6980 .body = { 0 }
6981 };
6982
6983 return node;
6984}
6985
6989static size_t
6990pm_statements_node_body_length(pm_statements_node_t *node) {
6991 return node && node->body.size;
6992}
6993
6997static void
6998pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6999 node->base.location = (pm_location_t) { .start = start, .end = end };
7000}
7001
7006static inline void
7007pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7008 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7009 node->base.location.start = statement->location.start;
7010 }
7011
7012 if (statement->location.end > node->base.location.end) {
7013 node->base.location.end = statement->location.end;
7014 }
7015}
7016
7020static void
7021pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7022 pm_statements_node_body_update(node, statement);
7023
7024 if (node->body.size > 0) {
7025 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7026
7027 switch (PM_NODE_TYPE(previous)) {
7028 case PM_BREAK_NODE:
7029 case PM_NEXT_NODE:
7030 case PM_REDO_NODE:
7031 case PM_RETRY_NODE:
7032 case PM_RETURN_NODE:
7033 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7034 break;
7035 default:
7036 break;
7037 }
7038 }
7039
7040 pm_node_list_append(&node->body, statement);
7041 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7042}
7043
7047static void
7048pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7049 pm_statements_node_body_update(node, statement);
7050 pm_node_list_prepend(&node->body, statement);
7051 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7052}
7053
7057static inline pm_string_node_t *
7058pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7059 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7060 pm_node_flags_t flags = 0;
7061
7062 switch (parser->frozen_string_literal) {
7063 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7064 flags = PM_STRING_FLAGS_MUTABLE;
7065 break;
7066 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7067 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7068 break;
7069 }
7070
7071 *node = (pm_string_node_t) {
7072 {
7073 .type = PM_STRING_NODE,
7074 .flags = flags,
7075 .node_id = PM_NODE_IDENTIFY(parser),
7076 .location = {
7077 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7078 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7079 }
7080 },
7081 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7082 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7083 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7084 .unescaped = *string
7085 };
7086
7087 return node;
7088}
7089
7093static pm_string_node_t *
7094pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7095 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7096}
7097
7102static pm_string_node_t *
7103pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7104 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7105 parser->current_string = PM_STRING_EMPTY;
7106 return node;
7107}
7108
7112static pm_super_node_t *
7113pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7114 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7115 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7116
7117 const uint8_t *end = pm_arguments_end(arguments);
7118 if (end == NULL) {
7119 assert(false && "unreachable");
7120 }
7121
7122 *node = (pm_super_node_t) {
7123 {
7124 .type = PM_SUPER_NODE,
7125 .node_id = PM_NODE_IDENTIFY(parser),
7126 .location = {
7127 .start = keyword->start,
7128 .end = end,
7129 }
7130 },
7131 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7132 .lparen_loc = arguments->opening_loc,
7133 .arguments = arguments->arguments,
7134 .rparen_loc = arguments->closing_loc,
7135 .block = arguments->block
7136 };
7137
7138 return node;
7139}
7140
7145static bool
7146pm_ascii_only_p(const pm_string_t *contents) {
7147 const size_t length = pm_string_length(contents);
7148 const uint8_t *source = pm_string_source(contents);
7149
7150 for (size_t index = 0; index < length; index++) {
7151 if (source[index] & 0x80) return false;
7152 }
7153
7154 return true;
7155}
7156
7160static void
7161parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7162 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7163 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7164
7165 if (width == 0) {
7166 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7167 break;
7168 }
7169
7170 cursor += width;
7171 }
7172}
7173
7178static void
7179parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7180 const pm_encoding_t *encoding = parser->encoding;
7181
7182 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7183 size_t width = encoding->char_width(cursor, end - cursor);
7184
7185 if (width == 0) {
7186 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7187 break;
7188 }
7189
7190 cursor += width;
7191 }
7192}
7193
7203static inline pm_node_flags_t
7204parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7205 if (parser->explicit_encoding != NULL) {
7206 // A Symbol may optionally have its encoding explicitly set. This will
7207 // happen if an escape sequence results in a non-ASCII code point.
7208 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7209 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7210 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7211 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7212 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7213 } else if (validate) {
7214 parse_symbol_encoding_validate_other(parser, location, contents);
7215 }
7216 } else if (pm_ascii_only_p(contents)) {
7217 // Ruby stipulates that all source files must use an ASCII-compatible
7218 // encoding. Thus, all symbols appearing in source are eligible for
7219 // "downgrading" to US-ASCII.
7220 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7221 } else if (validate) {
7222 parse_symbol_encoding_validate_other(parser, location, contents);
7223 }
7224
7225 return 0;
7226}
7227
7228static pm_node_flags_t
7229parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7230 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7231 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7232 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7233 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7234
7235 // There's special validation logic used if a string does not contain any character escape sequences.
7236 if (parser->explicit_encoding == NULL) {
7237 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7238 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7239 // the US-ASCII encoding.
7240 if (ascii_only) {
7241 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7242 }
7243
7244 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7245 if (!ascii_only) {
7246 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7247 }
7248 } else if (parser->encoding != modifier_encoding) {
7249 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7250
7251 if (modifier == 'n' && !ascii_only) {
7252 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7253 }
7254 }
7255
7256 return flags;
7257 }
7258
7259 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7260 bool mixed_encoding = false;
7261
7262 if (mixed_encoding) {
7263 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7264 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7265 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7266 bool valid_string_in_modifier_encoding = true;
7267
7268 if (!valid_string_in_modifier_encoding) {
7269 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7270 }
7271 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7272 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7273 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7274 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7275 }
7276 }
7277
7278 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7279 return flags;
7280}
7281
7288static pm_node_flags_t
7289parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7290 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7291 bool valid_unicode_range = true;
7292 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7293 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7294 return flags;
7295 }
7296
7297 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7298 // to multi-byte characters are allowed.
7299 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7300 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7301 // following error message appearing twice. We do the same for compatibility.
7302 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7303 }
7304
7313 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7314 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7315 }
7316
7317 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7318 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7319 }
7320
7321 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7322 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7323 }
7324
7325 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7326 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7327 }
7328
7329 // At this point no encoding modifiers will be present on the regular expression as they would have already
7330 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7331 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7332 if (ascii_only) {
7333 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7334 }
7335
7336 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7337 // or by specifying a modifier.
7338 //
7339 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7340 if (parser->explicit_encoding != NULL) {
7341 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7342 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7343 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7344 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7345 }
7346 }
7347
7348 return 0;
7349}
7350
7355static pm_symbol_node_t *
7356pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7357 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7358
7359 *node = (pm_symbol_node_t) {
7360 {
7361 .type = PM_SYMBOL_NODE,
7362 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7363 .node_id = PM_NODE_IDENTIFY(parser),
7364 .location = {
7365 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7366 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7367 }
7368 },
7369 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7370 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7371 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7372 .unescaped = *unescaped
7373 };
7374
7375 return node;
7376}
7377
7381static inline pm_symbol_node_t *
7382pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7383 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7384}
7385
7389static pm_symbol_node_t *
7390pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7391 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7392 parser->current_string = PM_STRING_EMPTY;
7393 return node;
7394}
7395
7399static pm_symbol_node_t *
7400pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7401 pm_symbol_node_t *node;
7402
7403 switch (token->type) {
7404 case PM_TOKEN_LABEL: {
7405 pm_token_t opening = not_provided(parser);
7406 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7407
7408 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7409 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7410
7411 assert((label.end - label.start) >= 0);
7412 pm_string_shared_init(&node->unescaped, label.start, label.end);
7413 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7414
7415 break;
7416 }
7417 case PM_TOKEN_MISSING: {
7418 pm_token_t opening = not_provided(parser);
7419 pm_token_t closing = not_provided(parser);
7420
7421 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7422 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7423 break;
7424 }
7425 default:
7426 assert(false && "unreachable");
7427 node = NULL;
7428 break;
7429 }
7430
7431 return node;
7432}
7433
7437static pm_symbol_node_t *
7438pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7439 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7440
7441 *node = (pm_symbol_node_t) {
7442 {
7443 .type = PM_SYMBOL_NODE,
7444 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7445 .node_id = PM_NODE_IDENTIFY(parser),
7446 .location = PM_LOCATION_NULL_VALUE(parser)
7447 },
7448 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7449 .unescaped = { 0 }
7450 };
7451
7452 pm_string_constant_init(&node->unescaped, content, strlen(content));
7453 return node;
7454}
7455
7459static bool
7460pm_symbol_node_label_p(pm_node_t *node) {
7461 const uint8_t *end = NULL;
7462
7463 switch (PM_NODE_TYPE(node)) {
7464 case PM_SYMBOL_NODE:
7465 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7466 break;
7467 case PM_INTERPOLATED_SYMBOL_NODE:
7468 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7469 break;
7470 default:
7471 return false;
7472 }
7473
7474 return (end != NULL) && (end[-1] == ':');
7475}
7476
7480static pm_symbol_node_t *
7481pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7482 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7483
7484 *new_node = (pm_symbol_node_t) {
7485 {
7486 .type = PM_SYMBOL_NODE,
7487 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7488 .node_id = PM_NODE_IDENTIFY(parser),
7489 .location = {
7490 .start = opening->start,
7491 .end = closing->end
7492 }
7493 },
7494 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7495 .value_loc = node->content_loc,
7496 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7497 .unescaped = node->unescaped
7498 };
7499
7500 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7501 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7502
7503 // We are explicitly _not_ using pm_node_destroy here because we don't want
7504 // to trash the unescaped string. We could instead copy the string if we
7505 // know that it is owned, but we're taking the fast path for now.
7506 xfree(node);
7507
7508 return new_node;
7509}
7510
7514static pm_string_node_t *
7515pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7516 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7517 pm_node_flags_t flags = 0;
7518
7519 switch (parser->frozen_string_literal) {
7520 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7521 flags = PM_STRING_FLAGS_MUTABLE;
7522 break;
7523 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7524 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7525 break;
7526 }
7527
7528 *new_node = (pm_string_node_t) {
7529 {
7530 .type = PM_STRING_NODE,
7531 .flags = flags,
7532 .node_id = PM_NODE_IDENTIFY(parser),
7533 .location = node->base.location
7534 },
7535 .opening_loc = node->opening_loc,
7536 .content_loc = node->value_loc,
7537 .closing_loc = node->closing_loc,
7538 .unescaped = node->unescaped
7539 };
7540
7541 // We are explicitly _not_ using pm_node_destroy here because we don't want
7542 // to trash the unescaped string. We could instead copy the string if we
7543 // know that it is owned, but we're taking the fast path for now.
7544 xfree(node);
7545
7546 return new_node;
7547}
7548
7552static pm_true_node_t *
7553pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7554 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7555 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7556
7557 *node = (pm_true_node_t) {{
7558 .type = PM_TRUE_NODE,
7559 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7560 .node_id = PM_NODE_IDENTIFY(parser),
7561 .location = PM_LOCATION_TOKEN_VALUE(token)
7562 }};
7563
7564 return node;
7565}
7566
7570static pm_true_node_t *
7571pm_true_node_synthesized_create(pm_parser_t *parser) {
7572 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7573
7574 *node = (pm_true_node_t) {{
7575 .type = PM_TRUE_NODE,
7576 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7577 .node_id = PM_NODE_IDENTIFY(parser),
7578 .location = { .start = parser->start, .end = parser->end }
7579 }};
7580
7581 return node;
7582}
7583
7587static pm_undef_node_t *
7588pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7589 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7590 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7591
7592 *node = (pm_undef_node_t) {
7593 {
7594 .type = PM_UNDEF_NODE,
7595 .node_id = PM_NODE_IDENTIFY(parser),
7596 .location = PM_LOCATION_TOKEN_VALUE(token),
7597 },
7598 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7599 .names = { 0 }
7600 };
7601
7602 return node;
7603}
7604
7608static void
7609pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7610 node->base.location.end = name->location.end;
7611 pm_node_list_append(&node->names, name);
7612}
7613
7617static pm_unless_node_t *
7618pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7619 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7620 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7621
7622 const uint8_t *end;
7623 if (statements != NULL) {
7624 end = statements->base.location.end;
7625 } else {
7626 end = predicate->location.end;
7627 }
7628
7629 *node = (pm_unless_node_t) {
7630 {
7631 .type = PM_UNLESS_NODE,
7632 .flags = PM_NODE_FLAG_NEWLINE,
7633 .node_id = PM_NODE_IDENTIFY(parser),
7634 .location = {
7635 .start = keyword->start,
7636 .end = end
7637 },
7638 },
7639 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7640 .predicate = predicate,
7641 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7642 .statements = statements,
7643 .else_clause = NULL,
7644 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7645 };
7646
7647 return node;
7648}
7649
7653static pm_unless_node_t *
7654pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7655 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7656 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7657
7658 pm_statements_node_t *statements = pm_statements_node_create(parser);
7659 pm_statements_node_body_append(parser, statements, statement, true);
7660
7661 *node = (pm_unless_node_t) {
7662 {
7663 .type = PM_UNLESS_NODE,
7664 .flags = PM_NODE_FLAG_NEWLINE,
7665 .node_id = PM_NODE_IDENTIFY(parser),
7666 .location = {
7667 .start = statement->location.start,
7668 .end = predicate->location.end
7669 },
7670 },
7671 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7672 .predicate = predicate,
7673 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7674 .statements = statements,
7675 .else_clause = NULL,
7676 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7677 };
7678
7679 return node;
7680}
7681
7682static inline void
7683pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7684 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7685 node->base.location.end = end_keyword->end;
7686}
7687
7693static void
7694pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7695 assert(parser->current_block_exits != NULL);
7696
7697 // All of the block exits that we want to remove should be within the
7698 // statements, and since we are modifying the statements, we shouldn't have
7699 // to check the end location.
7700 const uint8_t *start = statements->base.location.start;
7701
7702 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7703 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7704 if (block_exit->location.start < start) break;
7705
7706 // Implicitly remove from the list by lowering the size.
7707 parser->current_block_exits->size--;
7708 }
7709}
7710
7714static pm_until_node_t *
7715pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7716 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7717 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7718
7719 *node = (pm_until_node_t) {
7720 {
7721 .type = PM_UNTIL_NODE,
7722 .flags = flags,
7723 .node_id = PM_NODE_IDENTIFY(parser),
7724 .location = {
7725 .start = keyword->start,
7726 .end = closing->end,
7727 },
7728 },
7729 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7730 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7731 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7732 .predicate = predicate,
7733 .statements = statements
7734 };
7735
7736 return node;
7737}
7738
7742static pm_until_node_t *
7743pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7744 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7745 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7746 pm_loop_modifier_block_exits(parser, statements);
7747
7748 *node = (pm_until_node_t) {
7749 {
7750 .type = PM_UNTIL_NODE,
7751 .flags = flags,
7752 .node_id = PM_NODE_IDENTIFY(parser),
7753 .location = {
7754 .start = statements->base.location.start,
7755 .end = predicate->location.end,
7756 },
7757 },
7758 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7759 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7760 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7761 .predicate = predicate,
7762 .statements = statements
7763 };
7764
7765 return node;
7766}
7767
7771static pm_when_node_t *
7772pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7773 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7774
7775 *node = (pm_when_node_t) {
7776 {
7777 .type = PM_WHEN_NODE,
7778 .node_id = PM_NODE_IDENTIFY(parser),
7779 .location = {
7780 .start = keyword->start,
7781 .end = NULL
7782 }
7783 },
7784 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7785 .statements = NULL,
7786 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7787 .conditions = { 0 }
7788 };
7789
7790 return node;
7791}
7792
7796static void
7797pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7798 node->base.location.end = condition->location.end;
7799 pm_node_list_append(&node->conditions, condition);
7800}
7801
7805static inline void
7806pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7807 node->base.location.end = then_keyword->end;
7808 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7809}
7810
7814static void
7815pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7816 if (statements->base.location.end > node->base.location.end) {
7817 node->base.location.end = statements->base.location.end;
7818 }
7819
7820 node->statements = statements;
7821}
7822
7826static pm_while_node_t *
7827pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7828 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7829 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7830
7831 *node = (pm_while_node_t) {
7832 {
7833 .type = PM_WHILE_NODE,
7834 .flags = flags,
7835 .node_id = PM_NODE_IDENTIFY(parser),
7836 .location = {
7837 .start = keyword->start,
7838 .end = closing->end
7839 },
7840 },
7841 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7842 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7843 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7844 .predicate = predicate,
7845 .statements = statements
7846 };
7847
7848 return node;
7849}
7850
7854static pm_while_node_t *
7855pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7856 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7857 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7858 pm_loop_modifier_block_exits(parser, statements);
7859
7860 *node = (pm_while_node_t) {
7861 {
7862 .type = PM_WHILE_NODE,
7863 .flags = flags,
7864 .node_id = PM_NODE_IDENTIFY(parser),
7865 .location = {
7866 .start = statements->base.location.start,
7867 .end = predicate->location.end
7868 },
7869 },
7870 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7871 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7872 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7873 .predicate = predicate,
7874 .statements = statements
7875 };
7876
7877 return node;
7878}
7879
7883static pm_while_node_t *
7884pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7885 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7886
7887 *node = (pm_while_node_t) {
7888 {
7889 .type = PM_WHILE_NODE,
7890 .node_id = PM_NODE_IDENTIFY(parser),
7891 .location = PM_LOCATION_NULL_VALUE(parser)
7892 },
7893 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7894 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7895 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7896 .predicate = predicate,
7897 .statements = statements
7898 };
7899
7900 return node;
7901}
7902
7907static pm_x_string_node_t *
7908pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7909 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7910
7911 *node = (pm_x_string_node_t) {
7912 {
7913 .type = PM_X_STRING_NODE,
7914 .flags = PM_STRING_FLAGS_FROZEN,
7915 .node_id = PM_NODE_IDENTIFY(parser),
7916 .location = {
7917 .start = opening->start,
7918 .end = closing->end
7919 },
7920 },
7921 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7922 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7923 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7924 .unescaped = *unescaped
7925 };
7926
7927 return node;
7928}
7929
7933static inline pm_x_string_node_t *
7934pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7935 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7936}
7937
7941static pm_yield_node_t *
7942pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7943 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7944
7945 const uint8_t *end;
7946 if (rparen_loc->start != NULL) {
7947 end = rparen_loc->end;
7948 } else if (arguments != NULL) {
7949 end = arguments->base.location.end;
7950 } else if (lparen_loc->start != NULL) {
7951 end = lparen_loc->end;
7952 } else {
7953 end = keyword->end;
7954 }
7955
7956 *node = (pm_yield_node_t) {
7957 {
7958 .type = PM_YIELD_NODE,
7959 .node_id = PM_NODE_IDENTIFY(parser),
7960 .location = {
7961 .start = keyword->start,
7962 .end = end
7963 },
7964 },
7965 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7966 .lparen_loc = *lparen_loc,
7967 .arguments = arguments,
7968 .rparen_loc = *rparen_loc
7969 };
7970
7971 return node;
7972}
7973
7974#undef PM_NODE_ALLOC
7975#undef PM_NODE_IDENTIFY
7976
7981static int
7982pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7983 pm_scope_t *scope = parser->current_scope;
7984 int depth = 0;
7985
7986 while (scope != NULL) {
7987 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7988 if (scope->closed) break;
7989
7990 scope = scope->previous;
7991 depth++;
7992 }
7993
7994 return -1;
7995}
7996
8002static inline int
8003pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
8004 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
8005}
8006
8010static inline void
8011pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8012 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8013}
8014
8018static pm_constant_id_t
8019pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8020 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8021 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8022 return constant_id;
8023}
8024
8028static inline pm_constant_id_t
8029pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8030 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8031}
8032
8036static pm_constant_id_t
8037pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8038 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8039 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8040 return constant_id;
8041}
8042
8046static pm_constant_id_t
8047pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8048 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8049 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8050 return constant_id;
8051}
8052
8060static bool
8061pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8062 // We want to check whether the parameter name is a numbered parameter or
8063 // not.
8064 pm_refute_numbered_parameter(parser, name->start, name->end);
8065
8066 // Otherwise we'll fetch the constant id for the parameter name and check
8067 // whether it's already in the current scope.
8068 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8069
8070 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8071 // Add an error if the parameter doesn't start with _ and has been seen before
8072 if ((name->start < name->end) && (*name->start != '_')) {
8073 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8074 }
8075 return true;
8076 }
8077 return false;
8078}
8079
8083static void
8084pm_parser_scope_pop(pm_parser_t *parser) {
8085 pm_scope_t *scope = parser->current_scope;
8086 parser->current_scope = scope->previous;
8087 pm_locals_free(&scope->locals);
8088 pm_node_list_free(&scope->implicit_parameters);
8089 xfree(scope);
8090}
8091
8092/******************************************************************************/
8093/* Stack helpers */
8094/******************************************************************************/
8095
8099static inline void
8100pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8101 *stack = (*stack << 1) | (value & 1);
8102}
8103
8107static inline void
8108pm_state_stack_pop(pm_state_stack_t *stack) {
8109 *stack >>= 1;
8110}
8111
8115static inline bool
8116pm_state_stack_p(const pm_state_stack_t *stack) {
8117 return *stack & 1;
8118}
8119
8120static inline void
8121pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8122 // Use the negation of the value to prevent stack overflow.
8123 pm_state_stack_push(&parser->accepts_block_stack, !value);
8124}
8125
8126static inline void
8127pm_accepts_block_stack_pop(pm_parser_t *parser) {
8128 pm_state_stack_pop(&parser->accepts_block_stack);
8129}
8130
8131static inline bool
8132pm_accepts_block_stack_p(pm_parser_t *parser) {
8133 return !pm_state_stack_p(&parser->accepts_block_stack);
8134}
8135
8136static inline void
8137pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8138 pm_state_stack_push(&parser->do_loop_stack, value);
8139}
8140
8141static inline void
8142pm_do_loop_stack_pop(pm_parser_t *parser) {
8143 pm_state_stack_pop(&parser->do_loop_stack);
8144}
8145
8146static inline bool
8147pm_do_loop_stack_p(pm_parser_t *parser) {
8148 return pm_state_stack_p(&parser->do_loop_stack);
8149}
8150
8151/******************************************************************************/
8152/* Lexer check helpers */
8153/******************************************************************************/
8154
8159static inline uint8_t
8160peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8161 if (cursor < parser->end) {
8162 return *cursor;
8163 } else {
8164 return '\0';
8165 }
8166}
8167
8173static inline uint8_t
8174peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8175 return peek_at(parser, parser->current.end + offset);
8176}
8177
8182static inline uint8_t
8183peek(const pm_parser_t *parser) {
8184 return peek_at(parser, parser->current.end);
8185}
8186
8191static inline bool
8192match(pm_parser_t *parser, uint8_t value) {
8193 if (peek(parser) == value) {
8194 parser->current.end++;
8195 return true;
8196 }
8197 return false;
8198}
8199
8204static inline size_t
8205match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8206 if (peek_at(parser, cursor) == '\n') {
8207 return 1;
8208 }
8209 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8210 return 2;
8211 }
8212 return 0;
8213}
8214
8220static inline size_t
8221match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8222 return match_eol_at(parser, parser->current.end + offset);
8223}
8224
8230static inline size_t
8231match_eol(pm_parser_t *parser) {
8232 return match_eol_at(parser, parser->current.end);
8233}
8234
8238static inline const uint8_t *
8239next_newline(const uint8_t *cursor, ptrdiff_t length) {
8240 assert(length >= 0);
8241
8242 // Note that it's okay for us to use memchr here to look for \n because none
8243 // of the encodings that we support have \n as a component of a multi-byte
8244 // character.
8245 return memchr(cursor, '\n', (size_t) length);
8246}
8247
8251static inline bool
8252ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8253 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8254}
8255
8260static bool
8261parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8262 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8263
8264 if (encoding != NULL) {
8265 if (parser->encoding != encoding) {
8266 parser->encoding = encoding;
8267 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8268 }
8269
8270 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8271 return true;
8272 }
8273
8274 return false;
8275}
8276
8281static void
8282parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8283 const uint8_t *cursor = parser->current.start + 1;
8284 const uint8_t *end = parser->current.end;
8285
8286 bool separator = false;
8287 while (true) {
8288 if (end - cursor <= 6) return;
8289 switch (cursor[6]) {
8290 case 'C': case 'c': cursor += 6; continue;
8291 case 'O': case 'o': cursor += 5; continue;
8292 case 'D': case 'd': cursor += 4; continue;
8293 case 'I': case 'i': cursor += 3; continue;
8294 case 'N': case 'n': cursor += 2; continue;
8295 case 'G': case 'g': cursor += 1; continue;
8296 case '=': case ':':
8297 separator = true;
8298 cursor += 6;
8299 break;
8300 default:
8301 cursor += 6;
8302 if (pm_char_is_whitespace(*cursor)) break;
8303 continue;
8304 }
8305 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8306 separator = false;
8307 }
8308
8309 while (true) {
8310 do {
8311 if (++cursor >= end) return;
8312 } while (pm_char_is_whitespace(*cursor));
8313
8314 if (separator) break;
8315 if (*cursor != '=' && *cursor != ':') return;
8316
8317 separator = true;
8318 cursor++;
8319 }
8320
8321 const uint8_t *value_start = cursor;
8322 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8323
8324 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8325 // If we were unable to parse the encoding value, then we've got an
8326 // issue because we didn't understand the encoding that the user was
8327 // trying to use. In this case we'll keep using the default encoding but
8328 // add an error to the parser to indicate an unsuccessful parse.
8329 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8330 }
8331}
8332
8333typedef enum {
8334 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8335 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8336 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8337} pm_magic_comment_boolean_value_t;
8338
8343static pm_magic_comment_boolean_value_t
8344parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8345 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8346 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8347 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8348 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8349 } else {
8350 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8351 }
8352}
8353
8354static inline bool
8355pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8356 return b == '\'' || b == '"' || b == ':' || b == ';';
8357}
8358
8364static inline const uint8_t *
8365parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8366 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8367 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8368 return cursor;
8369 }
8370 cursor++;
8371 }
8372 return NULL;
8373}
8374
8385static inline bool
8386parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8387 bool result = true;
8388
8389 const uint8_t *start = parser->current.start + 1;
8390 const uint8_t *end = parser->current.end;
8391 if (end - start <= 7) return false;
8392
8393 const uint8_t *cursor;
8394 bool indicator = false;
8395
8396 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8397 start = cursor + 3;
8398
8399 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8400 end = cursor;
8401 indicator = true;
8402 } else {
8403 // If we have a start marker but not an end marker, then we cannot
8404 // have a magic comment.
8405 return false;
8406 }
8407 }
8408
8409 cursor = start;
8410 while (cursor < end) {
8411 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8412
8413 const uint8_t *key_start = cursor;
8414 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8415
8416 const uint8_t *key_end = cursor;
8417 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8418 if (cursor == end) break;
8419
8420 if (*cursor == ':') {
8421 cursor++;
8422 } else {
8423 if (!indicator) return false;
8424 continue;
8425 }
8426
8427 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8428 if (cursor == end) break;
8429
8430 const uint8_t *value_start;
8431 const uint8_t *value_end;
8432
8433 if (*cursor == '"') {
8434 value_start = ++cursor;
8435 for (; cursor < end && *cursor != '"'; cursor++) {
8436 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8437 }
8438 value_end = cursor;
8439 if (*cursor == '"') cursor++;
8440 } else {
8441 value_start = cursor;
8442 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8443 value_end = cursor;
8444 }
8445
8446 if (indicator) {
8447 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8448 } else {
8449 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8450 if (cursor != end) return false;
8451 }
8452
8453 // Here, we need to do some processing on the key to swap out dashes for
8454 // underscores. We only need to do this if there _is_ a dash in the key.
8455 pm_string_t key;
8456 const size_t key_length = (size_t) (key_end - key_start);
8457 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8458
8459 if (dash == NULL) {
8460 pm_string_shared_init(&key, key_start, key_end);
8461 } else {
8462 uint8_t *buffer = xmalloc(key_length);
8463 if (buffer == NULL) break;
8464
8465 memcpy(buffer, key_start, key_length);
8466 buffer[dash - key_start] = '_';
8467
8468 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8469 buffer[dash - key_start] = '_';
8470 }
8471
8472 pm_string_owned_init(&key, buffer, key_length);
8473 }
8474
8475 // Finally, we can start checking the key against the list of known
8476 // magic comment keys, and potentially change state based on that.
8477 const uint8_t *key_source = pm_string_source(&key);
8478 uint32_t value_length = (uint32_t) (value_end - value_start);
8479
8480 // We only want to attempt to compare against encoding comments if it's
8481 // the first line in the file (or the second in the case of a shebang).
8482 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8483 if (
8484 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8485 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8486 ) {
8487 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8488 }
8489 }
8490
8491 if (key_length == 11) {
8492 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8493 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8494 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8495 PM_PARSER_WARN_TOKEN_FORMAT(
8496 parser,
8497 parser->current,
8498 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8499 (int) key_length,
8500 (const char *) key_source,
8501 (int) value_length,
8502 (const char *) value_start
8503 );
8504 break;
8505 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8506 parser->warn_mismatched_indentation = false;
8507 break;
8508 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8509 parser->warn_mismatched_indentation = true;
8510 break;
8511 }
8512 }
8513 } else if (key_length == 21) {
8514 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8515 // We only want to handle frozen string literal comments if it's
8516 // before any semantic tokens have been seen.
8517 if (semantic_token_seen) {
8518 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8519 } else {
8520 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8521 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8522 PM_PARSER_WARN_TOKEN_FORMAT(
8523 parser,
8524 parser->current,
8525 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8526 (int) key_length,
8527 (const char *) key_source,
8528 (int) value_length,
8529 (const char *) value_start
8530 );
8531 break;
8532 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8534 break;
8535 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8537 break;
8538 }
8539 }
8540 }
8541 } else if (key_length == 24) {
8542 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8543 const uint8_t *cursor = parser->current.start;
8544 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8545
8546 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8547 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8548 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8549 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8550 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8551 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8552 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8553 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8554 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8555 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8556 } else {
8557 PM_PARSER_WARN_TOKEN_FORMAT(
8558 parser,
8559 parser->current,
8560 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8561 (int) key_length,
8562 (const char *) key_source,
8563 (int) value_length,
8564 (const char *) value_start
8565 );
8566 }
8567 }
8568 }
8569
8570 // When we're done, we want to free the string in case we had to
8571 // allocate memory for it.
8572 pm_string_free(&key);
8573
8574 // Allocate a new magic comment node to append to the parser's list.
8576 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8577 magic_comment->key_start = key_start;
8578 magic_comment->value_start = value_start;
8579 magic_comment->key_length = (uint32_t) key_length;
8580 magic_comment->value_length = value_length;
8581 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8582 }
8583 }
8584
8585 return result;
8586}
8587
8588/******************************************************************************/
8589/* Context manipulations */
8590/******************************************************************************/
8591
8592static const uint32_t context_terminators[] = {
8593 [PM_CONTEXT_NONE] = 0,
8594 [PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8595 [PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8596 [PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8597 [PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8598 [PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8599 [PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8600 [PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8601 [PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8602 [PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8603 [PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8604 [PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
8605 [PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8606 [PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8607 [PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8608 [PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8609 [PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8610 [PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8611 [PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8612 [PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8613 [PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF),
8614 [PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF),
8615 [PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8616 [PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END),
8617 [PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8618 [PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END),
8619 [PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END),
8620 [PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN),
8621 [PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
8622 [PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
8623 [PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8624 [PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8625 [PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8626 [PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8627 [PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN),
8628 [PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF),
8629 [PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8630 [PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8631 [PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8632 [PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8633 [PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF),
8634 [PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT),
8635 [PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8636 [PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON),
8637 [PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
8638 [PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF),
8639 [PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
8640 [PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
8641 [PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
8642 [PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8643 [PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF),
8644 [PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
8645 [PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END),
8646 [PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END),
8647};
8648
8649static inline bool
8650context_terminator(pm_context_t context, pm_token_t *token) {
8651 return token->type < 32 && (context_terminators[context] & (1 << token->type));
8652}
8653
8658static pm_context_t
8659context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8660 pm_context_node_t *context_node = parser->current_context;
8661
8662 while (context_node != NULL) {
8663 if (context_terminator(context_node->context, token)) return context_node->context;
8664 context_node = context_node->prev;
8665 }
8666
8667 return PM_CONTEXT_NONE;
8668}
8669
8670static bool
8671context_push(pm_parser_t *parser, pm_context_t context) {
8672 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8673 if (context_node == NULL) return false;
8674
8675 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8676
8677 if (parser->current_context == NULL) {
8678 parser->current_context = context_node;
8679 } else {
8680 context_node->prev = parser->current_context;
8681 parser->current_context = context_node;
8682 }
8683
8684 return true;
8685}
8686
8687static void
8688context_pop(pm_parser_t *parser) {
8689 pm_context_node_t *prev = parser->current_context->prev;
8690 xfree(parser->current_context);
8691 parser->current_context = prev;
8692}
8693
8694static bool
8695context_p(const pm_parser_t *parser, pm_context_t context) {
8696 pm_context_node_t *context_node = parser->current_context;
8697
8698 while (context_node != NULL) {
8699 if (context_node->context == context) return true;
8700 context_node = context_node->prev;
8701 }
8702
8703 return false;
8704}
8705
8706static bool
8707context_def_p(const pm_parser_t *parser) {
8708 pm_context_node_t *context_node = parser->current_context;
8709
8710 while (context_node != NULL) {
8711 switch (context_node->context) {
8712 case PM_CONTEXT_DEF:
8717 return true;
8718 case PM_CONTEXT_CLASS:
8722 case PM_CONTEXT_MODULE:
8726 case PM_CONTEXT_SCLASS:
8730 return false;
8731 default:
8732 context_node = context_node->prev;
8733 }
8734 }
8735
8736 return false;
8737}
8738
8743static const char *
8744context_human(pm_context_t context) {
8745 switch (context) {
8746 case PM_CONTEXT_NONE:
8747 assert(false && "unreachable");
8748 return "";
8749 case PM_CONTEXT_BEGIN: return "begin statement";
8750 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8751 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8752 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8753 case PM_CONTEXT_CASE_IN: return "'in' clause";
8754 case PM_CONTEXT_CLASS: return "class definition";
8755 case PM_CONTEXT_DEF: return "method definition";
8756 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8757 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8758 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8759 case PM_CONTEXT_ELSE:
8766 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8767 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8768 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8775 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8776 case PM_CONTEXT_FOR: return "for loop";
8777 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8778 case PM_CONTEXT_IF: return "if statement";
8779 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8780 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8781 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8782 case PM_CONTEXT_MAIN: return "top level context";
8783 case PM_CONTEXT_MODULE: return "module definition";
8784 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8785 case PM_CONTEXT_PARENS: return "parentheses";
8786 case PM_CONTEXT_POSTEXE: return "'END' block";
8787 case PM_CONTEXT_PREDICATE: return "predicate";
8788 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8796 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8797 case PM_CONTEXT_SCLASS: return "singleton class definition";
8798 case PM_CONTEXT_TERNARY: return "ternary expression";
8799 case PM_CONTEXT_UNLESS: return "unless statement";
8800 case PM_CONTEXT_UNTIL: return "until statement";
8801 case PM_CONTEXT_WHILE: return "while statement";
8802 }
8803
8804 assert(false && "unreachable");
8805 return "";
8806}
8807
8808/******************************************************************************/
8809/* Specific token lexers */
8810/******************************************************************************/
8811
8812static inline void
8813pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8814 if (invalid != NULL) {
8815 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8816 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8817 }
8818}
8819
8820static size_t
8821pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8822 const uint8_t *invalid = NULL;
8823 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8824 pm_strspn_number_validate(parser, string, length, invalid);
8825 return length;
8826}
8827
8828static size_t
8829pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8830 const uint8_t *invalid = NULL;
8831 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8832 pm_strspn_number_validate(parser, string, length, invalid);
8833 return length;
8834}
8835
8836static size_t
8837pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8838 const uint8_t *invalid = NULL;
8839 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8840 pm_strspn_number_validate(parser, string, length, invalid);
8841 return length;
8842}
8843
8844static size_t
8845pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8846 const uint8_t *invalid = NULL;
8847 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8848 pm_strspn_number_validate(parser, string, length, invalid);
8849 return length;
8850}
8851
8852static pm_token_type_t
8853lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8854 pm_token_type_t type = PM_TOKEN_INTEGER;
8855
8856 // Here we're going to attempt to parse the optional decimal portion of a
8857 // float. If it's not there, then it's okay and we'll just continue on.
8858 if (peek(parser) == '.') {
8859 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8860 parser->current.end += 2;
8861 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8862 type = PM_TOKEN_FLOAT;
8863 } else {
8864 // If we had a . and then something else, then it's not a float
8865 // suffix on a number it's a method call or something else.
8866 return type;
8867 }
8868 }
8869
8870 // Here we're going to attempt to parse the optional exponent portion of a
8871 // float. If it's not there, it's okay and we'll just continue on.
8872 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8873 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8874 parser->current.end += 2;
8875
8876 if (pm_char_is_decimal_digit(peek(parser))) {
8877 parser->current.end++;
8878 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8879 } else {
8880 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8881 }
8882 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8883 parser->current.end++;
8884 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8885 } else {
8886 return type;
8887 }
8888
8889 *seen_e = true;
8890 type = PM_TOKEN_FLOAT;
8891 }
8892
8893 return type;
8894}
8895
8896static pm_token_type_t
8897lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8898 pm_token_type_t type = PM_TOKEN_INTEGER;
8899 *seen_e = false;
8900
8901 if (peek_offset(parser, -1) == '0') {
8902 switch (*parser->current.end) {
8903 // 0d1111 is a decimal number
8904 case 'd':
8905 case 'D':
8906 parser->current.end++;
8907 if (pm_char_is_decimal_digit(peek(parser))) {
8908 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8909 } else {
8910 match(parser, '_');
8911 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8912 }
8913
8914 break;
8915
8916 // 0b1111 is a binary number
8917 case 'b':
8918 case 'B':
8919 parser->current.end++;
8920 if (pm_char_is_binary_digit(peek(parser))) {
8921 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8922 } else {
8923 match(parser, '_');
8924 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8925 }
8926
8927 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8928 break;
8929
8930 // 0o1111 is an octal number
8931 case 'o':
8932 case 'O':
8933 parser->current.end++;
8934 if (pm_char_is_octal_digit(peek(parser))) {
8935 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8936 } else {
8937 match(parser, '_');
8938 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8939 }
8940
8941 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8942 break;
8943
8944 // 01111 is an octal number
8945 case '_':
8946 case '0':
8947 case '1':
8948 case '2':
8949 case '3':
8950 case '4':
8951 case '5':
8952 case '6':
8953 case '7':
8954 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8955 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8956 break;
8957
8958 // 0x1111 is a hexadecimal number
8959 case 'x':
8960 case 'X':
8961 parser->current.end++;
8962 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8963 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8964 } else {
8965 match(parser, '_');
8966 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8967 }
8968
8969 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8970 break;
8971
8972 // 0.xxx is a float
8973 case '.': {
8974 type = lex_optional_float_suffix(parser, seen_e);
8975 break;
8976 }
8977
8978 // 0exxx is a float
8979 case 'e':
8980 case 'E': {
8981 type = lex_optional_float_suffix(parser, seen_e);
8982 break;
8983 }
8984 }
8985 } else {
8986 // If it didn't start with a 0, then we'll lex as far as we can into a
8987 // decimal number.
8988 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8989
8990 // Afterward, we'll lex as far as we can into an optional float suffix.
8991 type = lex_optional_float_suffix(parser, seen_e);
8992 }
8993
8994 // At this point we have a completed number, but we want to provide the user
8995 // with a good experience if they put an additional .xxx fractional
8996 // component on the end, so we'll check for that here.
8997 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8998 const uint8_t *fraction_start = parser->current.end;
8999 const uint8_t *fraction_end = parser->current.end + 2;
9000 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
9001 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
9002 }
9003
9004 return type;
9005}
9006
9007static pm_token_type_t
9008lex_numeric(pm_parser_t *parser) {
9009 pm_token_type_t type = PM_TOKEN_INTEGER;
9010 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
9011
9012 if (parser->current.end < parser->end) {
9013 bool seen_e = false;
9014 type = lex_numeric_prefix(parser, &seen_e);
9015
9016 const uint8_t *end = parser->current.end;
9017 pm_token_type_t suffix_type = type;
9018
9019 if (type == PM_TOKEN_INTEGER) {
9020 if (match(parser, 'r')) {
9021 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9022
9023 if (match(parser, 'i')) {
9024 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
9025 }
9026 } else if (match(parser, 'i')) {
9027 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9028 }
9029 } else {
9030 if (!seen_e && match(parser, 'r')) {
9031 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9032
9033 if (match(parser, 'i')) {
9034 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9035 }
9036 } else if (match(parser, 'i')) {
9037 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9038 }
9039 }
9040
9041 const uint8_t b = peek(parser);
9042 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9043 parser->current.end = end;
9044 } else {
9045 type = suffix_type;
9046 }
9047 }
9048
9049 return type;
9050}
9051
9052static pm_token_type_t
9053lex_global_variable(pm_parser_t *parser) {
9054 if (parser->current.end >= parser->end) {
9055 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9056 return PM_TOKEN_GLOBAL_VARIABLE;
9057 }
9058
9059 // True if multiple characters are allowed after the declaration of the
9060 // global variable. Not true when it starts with "$-".
9061 bool allow_multiple = true;
9062
9063 switch (*parser->current.end) {
9064 case '~': // $~: match-data
9065 case '*': // $*: argv
9066 case '$': // $$: pid
9067 case '?': // $?: last status
9068 case '!': // $!: error string
9069 case '@': // $@: error position
9070 case '/': // $/: input record separator
9071 case '\\': // $\: output record separator
9072 case ';': // $;: field separator
9073 case ',': // $,: output field separator
9074 case '.': // $.: last read line number
9075 case '=': // $=: ignorecase
9076 case ':': // $:: load path
9077 case '<': // $<: reading filename
9078 case '>': // $>: default output handle
9079 case '\"': // $": already loaded files
9080 parser->current.end++;
9081 return PM_TOKEN_GLOBAL_VARIABLE;
9082
9083 case '&': // $&: last match
9084 case '`': // $`: string before last match
9085 case '\'': // $': string after last match
9086 case '+': // $+: string matches last paren.
9087 parser->current.end++;
9088 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9089
9090 case '0': {
9091 parser->current.end++;
9092 size_t width;
9093
9094 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9095 do {
9096 parser->current.end += width;
9097 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9098
9099 // $0 isn't allowed to be followed by anything.
9100 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9101 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9102 }
9103
9104 return PM_TOKEN_GLOBAL_VARIABLE;
9105 }
9106
9107 case '1':
9108 case '2':
9109 case '3':
9110 case '4':
9111 case '5':
9112 case '6':
9113 case '7':
9114 case '8':
9115 case '9':
9116 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9117 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9118
9119 case '-':
9120 parser->current.end++;
9121 allow_multiple = false;
9123 default: {
9124 size_t width;
9125
9126 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9127 do {
9128 parser->current.end += width;
9129 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9130 } else if (pm_char_is_whitespace(peek(parser))) {
9131 // If we get here, then we have a $ followed by whitespace,
9132 // which is not allowed.
9133 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9134 } else {
9135 // If we get here, then we have a $ followed by something that
9136 // isn't recognized as a global variable.
9137 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9138 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9139 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9140 }
9141
9142 return PM_TOKEN_GLOBAL_VARIABLE;
9143 }
9144 }
9145}
9146
9159static inline pm_token_type_t
9160lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9161 if (memcmp(current_start, value, vlen) == 0) {
9162 pm_lex_state_t last_state = parser->lex_state;
9163
9164 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9165 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9166 } else {
9167 lex_state_set(parser, state);
9168 if (state == PM_LEX_STATE_BEG) {
9169 parser->command_start = true;
9170 }
9171
9172 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9173 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9174 return modifier_type;
9175 }
9176 }
9177
9178 return type;
9179 }
9180
9181 return PM_TOKEN_EOF;
9182}
9183
9184static pm_token_type_t
9185lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9186 // Lex as far as we can into the current identifier.
9187 size_t width;
9188 const uint8_t *end = parser->end;
9189 const uint8_t *current_start = parser->current.start;
9190 const uint8_t *current_end = parser->current.end;
9191 bool encoding_changed = parser->encoding_changed;
9192
9193 if (encoding_changed) {
9194 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9195 current_end += width;
9196 }
9197 } else {
9198 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9199 current_end += width;
9200 }
9201 }
9202 parser->current.end = current_end;
9203
9204 // Now cache the length of the identifier so that we can quickly compare it
9205 // against known keywords.
9206 width = (size_t) (current_end - current_start);
9207
9208 if (current_end < end) {
9209 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9210 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9211 // check if we're returning the defined? keyword or just an identifier.
9212 width++;
9213
9214 if (
9215 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9216 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9217 ) {
9218 // If we're in a position where we can accept a : at the end of an
9219 // identifier, then we'll optionally accept it.
9220 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9221 (void) match(parser, ':');
9222 return PM_TOKEN_LABEL;
9223 }
9224
9225 if (parser->lex_state != PM_LEX_STATE_DOT) {
9226 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9227 return PM_TOKEN_KEYWORD_DEFINED;
9228 }
9229 }
9230
9231 return PM_TOKEN_METHOD_NAME;
9232 }
9233
9234 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9235 // If we're in a position where we can accept a = at the end of an
9236 // identifier, then we'll optionally accept it.
9237 return PM_TOKEN_IDENTIFIER;
9238 }
9239
9240 if (
9241 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9242 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9243 ) {
9244 // If we're in a position where we can accept a : at the end of an
9245 // identifier, then we'll optionally accept it.
9246 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9247 (void) match(parser, ':');
9248 return PM_TOKEN_LABEL;
9249 }
9250 }
9251
9252 if (parser->lex_state != PM_LEX_STATE_DOT) {
9253 pm_token_type_t type;
9254 switch (width) {
9255 case 2:
9256 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9257 if (pm_do_loop_stack_p(parser)) {
9258 return PM_TOKEN_KEYWORD_DO_LOOP;
9259 }
9260 return PM_TOKEN_KEYWORD_DO;
9261 }
9262
9263 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9264 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266 break;
9267 case 3:
9268 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 break;
9276 case 4:
9277 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 break;
9286 case 5:
9287 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9298 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9299 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 break;
9301 case 6:
9302 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9305 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9306 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9307 break;
9308 case 8:
9309 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9310 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9311 break;
9312 case 12:
9313 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9314 break;
9315 }
9316 }
9317
9318 if (encoding_changed) {
9319 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9320 }
9321 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9322}
9323
9328static bool
9329current_token_starts_line(pm_parser_t *parser) {
9330 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9331}
9332
9347static pm_token_type_t
9348lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9349 // If there is no content following this #, then we're at the end of
9350 // the string and we can safely return string content.
9351 if (pound + 1 >= parser->end) {
9352 parser->current.end = pound + 1;
9353 return PM_TOKEN_STRING_CONTENT;
9354 }
9355
9356 // Now we'll check against the character that follows the #. If it constitutes
9357 // valid interplation, we'll handle that, otherwise we'll return
9358 // PM_TOKEN_NOT_PROVIDED.
9359 switch (pound[1]) {
9360 case '@': {
9361 // In this case we may have hit an embedded instance or class variable.
9362 if (pound + 2 >= parser->end) {
9363 parser->current.end = pound + 1;
9364 return PM_TOKEN_STRING_CONTENT;
9365 }
9366
9367 // If we're looking at a @ and there's another @, then we'll skip past the
9368 // second @.
9369 const uint8_t *variable = pound + 2;
9370 if (*variable == '@' && pound + 3 < parser->end) variable++;
9371
9372 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9373 // At this point we're sure that we've either hit an embedded instance
9374 // or class variable. In this case we'll first need to check if we've
9375 // already consumed content.
9376 if (pound > parser->current.start) {
9377 parser->current.end = pound;
9378 return PM_TOKEN_STRING_CONTENT;
9379 }
9380
9381 // Otherwise we need to return the embedded variable token
9382 // and then switch to the embedded variable lex mode.
9383 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9384 parser->current.end = pound + 1;
9385 return PM_TOKEN_EMBVAR;
9386 }
9387
9388 // If we didn't get a valid interpolation, then this is just regular
9389 // string content. This is like if we get "#@-". In this case the caller
9390 // should keep lexing.
9391 parser->current.end = pound + 1;
9392 return PM_TOKEN_NOT_PROVIDED;
9393 }
9394 case '$':
9395 // In this case we may have hit an embedded global variable. If there's
9396 // not enough room, then we'll just return string content.
9397 if (pound + 2 >= parser->end) {
9398 parser->current.end = pound + 1;
9399 return PM_TOKEN_STRING_CONTENT;
9400 }
9401
9402 // This is the character that we're going to check to see if it is the
9403 // start of an identifier that would indicate that this is a global
9404 // variable.
9405 const uint8_t *check = pound + 2;
9406
9407 if (pound[2] == '-') {
9408 if (pound + 3 >= parser->end) {
9409 parser->current.end = pound + 2;
9410 return PM_TOKEN_STRING_CONTENT;
9411 }
9412
9413 check++;
9414 }
9415
9416 // If the character that we're going to check is the start of an
9417 // identifier, or we don't have a - and the character is a decimal number
9418 // or a global name punctuation character, then we've hit an embedded
9419 // global variable.
9420 if (
9421 char_is_identifier_start(parser, check, parser->end - check) ||
9422 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9423 ) {
9424 // In this case we've hit an embedded global variable. First check to
9425 // see if we've already consumed content. If we have, then we need to
9426 // return that content as string content first.
9427 if (pound > parser->current.start) {
9428 parser->current.end = pound;
9429 return PM_TOKEN_STRING_CONTENT;
9430 }
9431
9432 // Otherwise, we need to return the embedded variable token and switch
9433 // to the embedded variable lex mode.
9434 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9435 parser->current.end = pound + 1;
9436 return PM_TOKEN_EMBVAR;
9437 }
9438
9439 // In this case we've hit a #$ that does not indicate a global variable.
9440 // In this case we'll continue lexing past it.
9441 parser->current.end = pound + 1;
9442 return PM_TOKEN_NOT_PROVIDED;
9443 case '{':
9444 // In this case it's the start of an embedded expression. If we have
9445 // already consumed content, then we need to return that content as string
9446 // content first.
9447 if (pound > parser->current.start) {
9448 parser->current.end = pound;
9449 return PM_TOKEN_STRING_CONTENT;
9450 }
9451
9452 parser->enclosure_nesting++;
9453
9454 // Otherwise we'll skip past the #{ and begin lexing the embedded
9455 // expression.
9456 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9457 parser->current.end = pound + 2;
9458 parser->command_start = true;
9459 pm_do_loop_stack_push(parser, false);
9460 return PM_TOKEN_EMBEXPR_BEGIN;
9461 default:
9462 // In this case we've hit a # that doesn't constitute interpolation. We'll
9463 // mark that by returning the not provided token type. This tells the
9464 // consumer to keep lexing forward.
9465 parser->current.end = pound + 1;
9466 return PM_TOKEN_NOT_PROVIDED;
9467 }
9468}
9469
9470static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9471static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9472static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9473static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9474static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9475
9479static const bool ascii_printable_chars[] = {
9480 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9482 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9488};
9489
9490static inline bool
9491char_is_ascii_printable(const uint8_t b) {
9492 return (b < 0x80) && ascii_printable_chars[b];
9493}
9494
9499static inline uint8_t
9500escape_hexadecimal_digit(const uint8_t value) {
9501 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9502}
9503
9509static inline uint32_t
9510escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9511 uint32_t value = 0;
9512 for (size_t index = 0; index < length; index++) {
9513 if (index != 0) value <<= 4;
9514 value |= escape_hexadecimal_digit(string[index]);
9515 }
9516
9517 // Here we're going to verify that the value is actually a valid Unicode
9518 // codepoint and not a surrogate pair.
9519 if (value >= 0xD800 && value <= 0xDFFF) {
9520 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9521 return 0xFFFD;
9522 }
9523
9524 return value;
9525}
9526
9530static inline uint8_t
9531escape_byte(uint8_t value, const uint8_t flags) {
9532 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9533 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9534 return value;
9535}
9536
9540static inline void
9541escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9542 // \u escape sequences in string-like structures implicitly change the
9543 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9544 // literal.
9545 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9546 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9547 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9548 }
9549
9551 }
9552
9553 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9554 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9555 pm_buffer_append_byte(buffer, 0xEF);
9556 pm_buffer_append_byte(buffer, 0xBF);
9557 pm_buffer_append_byte(buffer, 0xBD);
9558 }
9559}
9560
9565static inline void
9566escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9567 if (byte >= 0x80) {
9568 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9569 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9570 }
9571
9572 parser->explicit_encoding = parser->encoding;
9573 }
9574
9575 pm_buffer_append_byte(buffer, byte);
9576}
9577
9593static inline void
9594escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9595 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9596 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9597 }
9598
9599 escape_write_byte_encoded(parser, buffer, byte);
9600}
9601
9605static inline void
9606escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9607 size_t width;
9608 if (parser->encoding_changed) {
9609 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9610 } else {
9611 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9612 }
9613
9614 if (width == 1) {
9615 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9616 } else if (width > 1) {
9617 // Valid multibyte character. Just ignore escape.
9618 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9619 pm_buffer_append_bytes(b, parser->current.end, width);
9620 parser->current.end += width;
9621 } else {
9622 // Assume the next character wasn't meant to be part of this escape
9623 // sequence since it is invalid. Add an error and move on.
9624 parser->current.end++;
9625 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9626 }
9627}
9628
9634static void
9635escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9636#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9637
9638 PM_PARSER_WARN_TOKEN_FORMAT(
9639 parser,
9640 parser->current,
9641 PM_WARN_INVALID_CHARACTER,
9642 FLAG(flags),
9643 FLAG(flag),
9644 type
9645 );
9646
9647#undef FLAG
9648}
9649
9653static void
9654escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9655 uint8_t peeked = peek(parser);
9656 switch (peeked) {
9657 case '\\': {
9658 parser->current.end++;
9659 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9660 return;
9661 }
9662 case '\'': {
9663 parser->current.end++;
9664 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9665 return;
9666 }
9667 case 'a': {
9668 parser->current.end++;
9669 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9670 return;
9671 }
9672 case 'b': {
9673 parser->current.end++;
9674 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9675 return;
9676 }
9677 case 'e': {
9678 parser->current.end++;
9679 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9680 return;
9681 }
9682 case 'f': {
9683 parser->current.end++;
9684 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9685 return;
9686 }
9687 case 'n': {
9688 parser->current.end++;
9689 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9690 return;
9691 }
9692 case 'r': {
9693 parser->current.end++;
9694 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9695 return;
9696 }
9697 case 's': {
9698 parser->current.end++;
9699 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9700 return;
9701 }
9702 case 't': {
9703 parser->current.end++;
9704 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9705 return;
9706 }
9707 case 'v': {
9708 parser->current.end++;
9709 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9710 return;
9711 }
9712 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9713 uint8_t value = (uint8_t) (*parser->current.end - '0');
9714 parser->current.end++;
9715
9716 if (pm_char_is_octal_digit(peek(parser))) {
9717 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9718 parser->current.end++;
9719
9720 if (pm_char_is_octal_digit(peek(parser))) {
9721 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9722 parser->current.end++;
9723 }
9724 }
9725
9726 value = escape_byte(value, flags);
9727 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9728 return;
9729 }
9730 case 'x': {
9731 const uint8_t *start = parser->current.end - 1;
9732
9733 parser->current.end++;
9734 uint8_t byte = peek(parser);
9735
9736 if (pm_char_is_hexadecimal_digit(byte)) {
9737 uint8_t value = escape_hexadecimal_digit(byte);
9738 parser->current.end++;
9739
9740 byte = peek(parser);
9741 if (pm_char_is_hexadecimal_digit(byte)) {
9742 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9743 parser->current.end++;
9744 }
9745
9746 value = escape_byte(value, flags);
9747 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9748 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9749 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9750 } else {
9751 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9752 }
9753 }
9754
9755 escape_write_byte_encoded(parser, buffer, value);
9756 } else {
9757 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9758 }
9759
9760 return;
9761 }
9762 case 'u': {
9763 const uint8_t *start = parser->current.end - 1;
9764 parser->current.end++;
9765
9766 if (parser->current.end == parser->end) {
9767 const uint8_t *start = parser->current.end - 2;
9768 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9769 } else if (peek(parser) == '{') {
9770 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9771 parser->current.end++;
9772
9773 size_t whitespace;
9774 while (true) {
9775 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9776 parser->current.end += whitespace;
9777 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9778 // This is super hacky, but it gets us nicer error
9779 // messages because we can still pass it off to the
9780 // regular expression engine even if we hit an
9781 // unterminated regular expression.
9782 parser->current.end += 2;
9783 } else {
9784 break;
9785 }
9786 }
9787
9788 const uint8_t *extra_codepoints_start = NULL;
9789 int codepoints_count = 0;
9790
9791 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9792 const uint8_t *unicode_start = parser->current.end;
9793 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9794
9795 if (hexadecimal_length > 6) {
9796 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9797 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9798 } else if (hexadecimal_length == 0) {
9799 // there are not hexadecimal characters
9800
9801 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9802 // If this is a regular expression, we are going to
9803 // let the regular expression engine handle this
9804 // error instead of us.
9805 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9806 } else {
9807 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9808 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9809 }
9810
9811 return;
9812 }
9813
9814 parser->current.end += hexadecimal_length;
9815 codepoints_count++;
9816 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9817 extra_codepoints_start = unicode_start;
9818 }
9819
9820 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9821 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9822
9823 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9824 }
9825
9826 // ?\u{nnnn} character literal should contain only one codepoint
9827 // and cannot be like ?\u{nnnn mmmm}.
9828 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9829 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9830 }
9831
9832 if (parser->current.end == parser->end) {
9833 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9834 } else if (peek(parser) == '}') {
9835 parser->current.end++;
9836 } else {
9837 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9838 // If this is a regular expression, we are going to let
9839 // the regular expression engine handle this error
9840 // instead of us.
9841 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9842 } else {
9843 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9844 }
9845 }
9846
9847 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9848 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9849 }
9850 } else {
9851 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9852
9853 if (length == 0) {
9854 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9855 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9856 } else {
9857 const uint8_t *start = parser->current.end - 2;
9858 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9859 }
9860 } else if (length == 4) {
9861 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9862
9863 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9864 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9865 }
9866
9867 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9868 parser->current.end += 4;
9869 } else {
9870 parser->current.end += length;
9871
9872 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9873 // If this is a regular expression, we are going to let
9874 // the regular expression engine handle this error
9875 // instead of us.
9876 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9877 } else {
9878 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9879 }
9880 }
9881 }
9882
9883 return;
9884 }
9885 case 'c': {
9886 parser->current.end++;
9887 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9888 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9889 }
9890
9891 if (parser->current.end == parser->end) {
9892 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9893 return;
9894 }
9895
9896 uint8_t peeked = peek(parser);
9897 switch (peeked) {
9898 case '?': {
9899 parser->current.end++;
9900 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9901 return;
9902 }
9903 case '\\':
9904 parser->current.end++;
9905
9906 if (match(parser, 'u') || match(parser, 'U')) {
9907 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9908 return;
9909 }
9910
9911 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9912 return;
9913 case ' ':
9914 parser->current.end++;
9915 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9916 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9917 return;
9918 case '\t':
9919 parser->current.end++;
9920 escape_read_warn(parser, flags, 0, "\\t");
9921 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9922 return;
9923 default: {
9924 if (!char_is_ascii_printable(peeked)) {
9925 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9926 return;
9927 }
9928
9929 parser->current.end++;
9930 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9931 return;
9932 }
9933 }
9934 }
9935 case 'C': {
9936 parser->current.end++;
9937 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9938 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9939 }
9940
9941 if (peek(parser) != '-') {
9942 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9943 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9944 return;
9945 }
9946
9947 parser->current.end++;
9948 if (parser->current.end == parser->end) {
9949 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9950 return;
9951 }
9952
9953 uint8_t peeked = peek(parser);
9954 switch (peeked) {
9955 case '?': {
9956 parser->current.end++;
9957 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9958 return;
9959 }
9960 case '\\':
9961 parser->current.end++;
9962
9963 if (match(parser, 'u') || match(parser, 'U')) {
9964 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9965 return;
9966 }
9967
9968 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9969 return;
9970 case ' ':
9971 parser->current.end++;
9972 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9973 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9974 return;
9975 case '\t':
9976 parser->current.end++;
9977 escape_read_warn(parser, flags, 0, "\\t");
9978 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9979 return;
9980 default: {
9981 if (!char_is_ascii_printable(peeked)) {
9982 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9983 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9984 return;
9985 }
9986
9987 parser->current.end++;
9988 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9989 return;
9990 }
9991 }
9992 }
9993 case 'M': {
9994 parser->current.end++;
9995 if (flags & PM_ESCAPE_FLAG_META) {
9996 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9997 }
9998
9999 if (peek(parser) != '-') {
10000 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10001 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10002 return;
10003 }
10004
10005 parser->current.end++;
10006 if (parser->current.end == parser->end) {
10007 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10008 return;
10009 }
10010
10011 uint8_t peeked = peek(parser);
10012 switch (peeked) {
10013 case '\\':
10014 parser->current.end++;
10015
10016 if (match(parser, 'u') || match(parser, 'U')) {
10017 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10018 return;
10019 }
10020
10021 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10022 return;
10023 case ' ':
10024 parser->current.end++;
10025 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10026 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10027 return;
10028 case '\t':
10029 parser->current.end++;
10030 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10031 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10032 return;
10033 default:
10034 if (!char_is_ascii_printable(peeked)) {
10035 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10036 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10037 return;
10038 }
10039
10040 parser->current.end++;
10041 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10042 return;
10043 }
10044 }
10045 case '\r': {
10046 if (peek_offset(parser, 1) == '\n') {
10047 parser->current.end += 2;
10048 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10049 return;
10050 }
10052 }
10053 default: {
10054 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10055 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10056 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10057 return;
10058 }
10059 if (parser->current.end < parser->end) {
10060 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10061 } else {
10062 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10063 }
10064 return;
10065 }
10066 }
10067}
10068
10094static pm_token_type_t
10095lex_question_mark(pm_parser_t *parser) {
10096 if (lex_state_end_p(parser)) {
10097 lex_state_set(parser, PM_LEX_STATE_BEG);
10098 return PM_TOKEN_QUESTION_MARK;
10099 }
10100
10101 if (parser->current.end >= parser->end) {
10102 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10103 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10104 return PM_TOKEN_CHARACTER_LITERAL;
10105 }
10106
10107 if (pm_char_is_whitespace(*parser->current.end)) {
10108 lex_state_set(parser, PM_LEX_STATE_BEG);
10109 return PM_TOKEN_QUESTION_MARK;
10110 }
10111
10112 lex_state_set(parser, PM_LEX_STATE_BEG);
10113
10114 if (match(parser, '\\')) {
10115 lex_state_set(parser, PM_LEX_STATE_END);
10116
10117 pm_buffer_t buffer;
10118 pm_buffer_init_capacity(&buffer, 3);
10119
10120 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10121 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10122
10123 return PM_TOKEN_CHARACTER_LITERAL;
10124 } else {
10125 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10126
10127 // Ternary operators can have a ? immediately followed by an identifier
10128 // which starts with an underscore. We check for this case here.
10129 if (
10130 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10131 (
10132 (parser->current.end + encoding_width >= parser->end) ||
10133 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10134 )
10135 ) {
10136 lex_state_set(parser, PM_LEX_STATE_END);
10137 parser->current.end += encoding_width;
10138 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10139 return PM_TOKEN_CHARACTER_LITERAL;
10140 }
10141 }
10142
10143 return PM_TOKEN_QUESTION_MARK;
10144}
10145
10150static pm_token_type_t
10151lex_at_variable(pm_parser_t *parser) {
10152 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10153 const uint8_t *end = parser->end;
10154
10155 size_t width;
10156 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10157 parser->current.end += width;
10158
10159 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10160 parser->current.end += width;
10161 }
10162 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10163 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10164 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
10165 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10166 }
10167
10168 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10169 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10170 } else {
10171 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10172 pm_parser_err_token(parser, &parser->current, diag_id);
10173 }
10174
10175 // If we're lexing an embedded variable, then we need to pop back into the
10176 // parent lex context.
10177 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10178 lex_mode_pop(parser);
10179 }
10180
10181 return type;
10182}
10183
10187static inline void
10188parser_lex_callback(pm_parser_t *parser) {
10189 if (parser->lex_callback) {
10190 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10191 }
10192}
10193
10197static inline pm_comment_t *
10198parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10199 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10200 if (comment == NULL) return NULL;
10201
10202 *comment = (pm_comment_t) {
10203 .type = type,
10204 .location = { parser->current.start, parser->current.end }
10205 };
10206
10207 return comment;
10208}
10209
10215static pm_token_type_t
10216lex_embdoc(pm_parser_t *parser) {
10217 // First, lex out the EMBDOC_BEGIN token.
10218 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10219
10220 if (newline == NULL) {
10221 parser->current.end = parser->end;
10222 } else {
10223 pm_newline_list_append(&parser->newline_list, newline);
10224 parser->current.end = newline + 1;
10225 }
10226
10227 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10228 parser_lex_callback(parser);
10229
10230 // Now, create a comment that is going to be attached to the parser.
10231 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10232 if (comment == NULL) return PM_TOKEN_EOF;
10233
10234 // Now, loop until we find the end of the embedded documentation or the end
10235 // of the file.
10236 while (parser->current.end + 4 <= parser->end) {
10237 parser->current.start = parser->current.end;
10238
10239 // If we've hit the end of the embedded documentation then we'll return
10240 // that token here.
10241 if (
10242 (memcmp(parser->current.end, "=end", 4) == 0) &&
10243 (
10244 (parser->current.end + 4 == parser->end) || // end of file
10245 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10246 (parser->current.end[4] == '\0') || // NUL or end of script
10247 (parser->current.end[4] == '\004') || // ^D
10248 (parser->current.end[4] == '\032') // ^Z
10249 )
10250 ) {
10251 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10252
10253 if (newline == NULL) {
10254 parser->current.end = parser->end;
10255 } else {
10256 pm_newline_list_append(&parser->newline_list, newline);
10257 parser->current.end = newline + 1;
10258 }
10259
10260 parser->current.type = PM_TOKEN_EMBDOC_END;
10261 parser_lex_callback(parser);
10262
10263 comment->location.end = parser->current.end;
10264 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10265
10266 return PM_TOKEN_EMBDOC_END;
10267 }
10268
10269 // Otherwise, we'll parse until the end of the line and return a line of
10270 // embedded documentation.
10271 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10272
10273 if (newline == NULL) {
10274 parser->current.end = parser->end;
10275 } else {
10276 pm_newline_list_append(&parser->newline_list, newline);
10277 parser->current.end = newline + 1;
10278 }
10279
10280 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10281 parser_lex_callback(parser);
10282 }
10283
10284 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10285
10286 comment->location.end = parser->current.end;
10287 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10288
10289 return PM_TOKEN_EOF;
10290}
10291
10297static inline void
10298parser_lex_ignored_newline(pm_parser_t *parser) {
10299 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10300 parser_lex_callback(parser);
10301}
10302
10312static inline void
10313parser_flush_heredoc_end(pm_parser_t *parser) {
10314 assert(parser->heredoc_end <= parser->end);
10315 parser->next_start = parser->heredoc_end;
10316 parser->heredoc_end = NULL;
10317}
10318
10322static bool
10323parser_end_of_line_p(const pm_parser_t *parser) {
10324 const uint8_t *cursor = parser->current.end;
10325
10326 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10327 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10328 }
10329
10330 return true;
10331}
10332
10351typedef struct {
10357
10362 const uint8_t *cursor;
10364
10384
10388static inline void
10389pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10390 pm_buffer_append_byte(&token_buffer->buffer, byte);
10391}
10392
10393static inline void
10394pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10395 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10396}
10397
10401static inline size_t
10402parser_char_width(const pm_parser_t *parser) {
10403 size_t width;
10404 if (parser->encoding_changed) {
10405 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10406 } else {
10407 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10408 }
10409
10410 // TODO: If the character is invalid in the given encoding, then we'll just
10411 // push one byte into the buffer. This should actually be an error.
10412 return (width == 0 ? 1 : width);
10413}
10414
10418static void
10419pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10420 size_t width = parser_char_width(parser);
10421 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10422 parser->current.end += width;
10423}
10424
10425static void
10426pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10427 size_t width = parser_char_width(parser);
10428 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10429 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10430 parser->current.end += width;
10431}
10432
10433static bool
10434pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10435 for (size_t index = 0; index < length; index++) {
10436 if (value[index] & 0x80) return false;
10437 }
10438
10439 return true;
10440}
10441
10448static inline void
10449pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10450 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10451}
10452
10453static inline void
10454pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10455 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10456 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10457 pm_buffer_free(&token_buffer->regexp_buffer);
10458}
10459
10469static void
10470pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10471 if (token_buffer->cursor == NULL) {
10472 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10473 } else {
10474 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10475 pm_token_buffer_copy(parser, token_buffer);
10476 }
10477}
10478
10479static void
10480pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10481 if (token_buffer->base.cursor == NULL) {
10482 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10483 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10484 } else {
10485 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10486 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10487 pm_regexp_token_buffer_copy(parser, token_buffer);
10488 }
10489}
10490
10491#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10492
10501static void
10502pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10503 const uint8_t *start;
10504 if (token_buffer->cursor == NULL) {
10505 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10506 start = parser->current.start;
10507 } else {
10508 start = token_buffer->cursor;
10509 }
10510
10511 const uint8_t *end = parser->current.end - 1;
10512 assert(end >= start);
10513 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10514
10515 token_buffer->cursor = end;
10516}
10517
10518static void
10519pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10520 const uint8_t *start;
10521 if (token_buffer->base.cursor == NULL) {
10522 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10523 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10524 start = parser->current.start;
10525 } else {
10526 start = token_buffer->base.cursor;
10527 }
10528
10529 const uint8_t *end = parser->current.end - 1;
10530 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10531 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10532
10533 token_buffer->base.cursor = end;
10534}
10535
10536#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10537
10542static inline size_t
10543pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10544 size_t whitespace = 0;
10545
10546 switch (indent) {
10547 case PM_HEREDOC_INDENT_NONE:
10548 // Do nothing, we can't match a terminator with
10549 // indentation and there's no need to calculate common
10550 // whitespace.
10551 break;
10552 case PM_HEREDOC_INDENT_DASH:
10553 // Skip past inline whitespace.
10554 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10555 break;
10556 case PM_HEREDOC_INDENT_TILDE:
10557 // Skip past inline whitespace and calculate common
10558 // whitespace.
10559 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10560 if (**cursor == '\t') {
10561 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10562 } else {
10563 whitespace++;
10564 }
10565 (*cursor)++;
10566 }
10567
10568 break;
10569 }
10570
10571 return whitespace;
10572}
10573
10578static uint8_t
10579pm_lex_percent_delimiter(pm_parser_t *parser) {
10580 size_t eol_length = match_eol(parser);
10581
10582 if (eol_length) {
10583 if (parser->heredoc_end) {
10584 // If we have already lexed a heredoc, then the newline has already
10585 // been added to the list. In this case we want to just flush the
10586 // heredoc end.
10587 parser_flush_heredoc_end(parser);
10588 } else {
10589 // Otherwise, we'll add the newline to the list of newlines.
10590 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10591 }
10592
10593 uint8_t delimiter = *parser->current.end;
10594
10595 // If our delimiter is \r\n, we want to treat it as if it's \n.
10596 // For example, %\r\nfoo\r\n should be "foo"
10597 if (eol_length == 2) {
10598 delimiter = *(parser->current.end + 1);
10599 }
10600
10601 parser->current.end += eol_length;
10602 return delimiter;
10603 }
10604
10605 return *parser->current.end++;
10606}
10607
10612#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10613
10620static void
10621parser_lex(pm_parser_t *parser) {
10622 assert(parser->current.end <= parser->end);
10623 parser->previous = parser->current;
10624
10625 // This value mirrors cmd_state from CRuby.
10626 bool previous_command_start = parser->command_start;
10627 parser->command_start = false;
10628
10629 // This is used to communicate to the newline lexing function that we've
10630 // already seen a comment.
10631 bool lexed_comment = false;
10632
10633 // Here we cache the current value of the semantic token seen flag. This is
10634 // used to reset it in case we find a token that shouldn't flip this flag.
10635 unsigned int semantic_token_seen = parser->semantic_token_seen;
10636 parser->semantic_token_seen = true;
10637
10638 switch (parser->lex_modes.current->mode) {
10639 case PM_LEX_DEFAULT:
10640 case PM_LEX_EMBEXPR:
10641 case PM_LEX_EMBVAR:
10642
10643 // We have a specific named label here because we are going to jump back to
10644 // this location in the event that we have lexed a token that should not be
10645 // returned to the parser. This includes comments, ignored newlines, and
10646 // invalid tokens of some form.
10647 lex_next_token: {
10648 // If we have the special next_start pointer set, then we're going to jump
10649 // to that location and start lexing from there.
10650 if (parser->next_start != NULL) {
10651 parser->current.end = parser->next_start;
10652 parser->next_start = NULL;
10653 }
10654
10655 // This value mirrors space_seen from CRuby. It tracks whether or not
10656 // space has been eaten before the start of the next token.
10657 bool space_seen = false;
10658
10659 // First, we're going to skip past any whitespace at the front of the next
10660 // token.
10661 bool chomping = true;
10662 while (parser->current.end < parser->end && chomping) {
10663 switch (*parser->current.end) {
10664 case ' ':
10665 case '\t':
10666 case '\f':
10667 case '\v':
10668 parser->current.end++;
10669 space_seen = true;
10670 break;
10671 case '\r':
10672 if (match_eol_offset(parser, 1)) {
10673 chomping = false;
10674 } else {
10675 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10676 parser->current.end++;
10677 space_seen = true;
10678 }
10679 break;
10680 case '\\': {
10681 size_t eol_length = match_eol_offset(parser, 1);
10682 if (eol_length) {
10683 if (parser->heredoc_end) {
10684 parser->current.end = parser->heredoc_end;
10685 parser->heredoc_end = NULL;
10686 } else {
10687 parser->current.end += eol_length + 1;
10688 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10689 space_seen = true;
10690 }
10691 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10692 parser->current.end += 2;
10693 } else {
10694 chomping = false;
10695 }
10696
10697 break;
10698 }
10699 default:
10700 chomping = false;
10701 break;
10702 }
10703 }
10704
10705 // Next, we'll set to start of this token to be the current end.
10706 parser->current.start = parser->current.end;
10707
10708 // We'll check if we're at the end of the file. If we are, then we
10709 // need to return the EOF token.
10710 if (parser->current.end >= parser->end) {
10711 // If we hit EOF, but the EOF came immediately after a newline,
10712 // set the start of the token to the newline. This way any EOF
10713 // errors will be reported as happening on that line rather than
10714 // a line after. For example "foo(\n" should report an error
10715 // on line 1 even though EOF technically occurs on line 2.
10716 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10717 parser->current.start -= 1;
10718 }
10719 LEX(PM_TOKEN_EOF);
10720 }
10721
10722 // Finally, we'll check the current character to determine the next
10723 // token.
10724 switch (*parser->current.end++) {
10725 case '\0': // NUL or end of script
10726 case '\004': // ^D
10727 case '\032': // ^Z
10728 parser->current.end--;
10729 LEX(PM_TOKEN_EOF);
10730
10731 case '#': { // comments
10732 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10733 parser->current.end = ending == NULL ? parser->end : ending;
10734
10735 // If we found a comment while lexing, then we're going to
10736 // add it to the list of comments in the file and keep
10737 // lexing.
10738 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10739 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10740
10741 if (ending) parser->current.end++;
10742 parser->current.type = PM_TOKEN_COMMENT;
10743 parser_lex_callback(parser);
10744
10745 // Here, parse the comment to see if it's a magic comment
10746 // and potentially change state on the parser.
10747 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10748 ptrdiff_t length = parser->current.end - parser->current.start;
10749
10750 // If we didn't find a magic comment within the first
10751 // pass and we're at the start of the file, then we need
10752 // to do another pass to potentially find other patterns
10753 // for encoding comments.
10754 if (length >= 10 && !parser->encoding_locked) {
10755 parser_lex_magic_comment_encoding(parser);
10756 }
10757 }
10758
10759 lexed_comment = true;
10760 }
10762 case '\r':
10763 case '\n': {
10764 parser->semantic_token_seen = semantic_token_seen & 0x1;
10765 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10766
10767 if (eol_length) {
10768 // The only way you can have carriage returns in this
10769 // particular loop is if you have a carriage return
10770 // followed by a newline. In that case we'll just skip
10771 // over the carriage return and continue lexing, in
10772 // order to make it so that the newline token
10773 // encapsulates both the carriage return and the
10774 // newline. Note that we need to check that we haven't
10775 // already lexed a comment here because that falls
10776 // through into here as well.
10777 if (!lexed_comment) {
10778 parser->current.end += eol_length - 1; // skip CR
10779 }
10780
10781 if (parser->heredoc_end == NULL) {
10782 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10783 }
10784 }
10785
10786 if (parser->heredoc_end) {
10787 parser_flush_heredoc_end(parser);
10788 }
10789
10790 // If this is an ignored newline, then we can continue lexing after
10791 // calling the callback with the ignored newline token.
10792 switch (lex_state_ignored_p(parser)) {
10793 case PM_IGNORED_NEWLINE_NONE:
10794 break;
10795 case PM_IGNORED_NEWLINE_PATTERN:
10796 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10797 if (!lexed_comment) parser_lex_ignored_newline(parser);
10798 lex_state_set(parser, PM_LEX_STATE_BEG);
10799 parser->command_start = true;
10800 parser->current.type = PM_TOKEN_NEWLINE;
10801 return;
10802 }
10804 case PM_IGNORED_NEWLINE_ALL:
10805 if (!lexed_comment) parser_lex_ignored_newline(parser);
10806 lexed_comment = false;
10807 goto lex_next_token;
10808 }
10809
10810 // Here we need to look ahead and see if there is a call operator
10811 // (either . or &.) that starts the next line. If there is, then this
10812 // is going to become an ignored newline and we're going to instead
10813 // return the call operator.
10814 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10815 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10816
10817 if (next_content < parser->end) {
10818 // If we hit a comment after a newline, then we're going to check
10819 // if it's ignored or if it's followed by a method call ('.').
10820 // If it is, then we're going to call the
10821 // callback with an ignored newline and then continue lexing.
10822 // Otherwise we'll return a regular newline.
10823 if (next_content[0] == '#') {
10824 // Here we look for a "." or "&." following a "\n".
10825 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10826
10827 while (following && (following + 1 < parser->end)) {
10828 following++;
10829 following += pm_strspn_inline_whitespace(following, parser->end - following);
10830
10831 // If this is not followed by a comment, then we can break out
10832 // of this loop.
10833 if (peek_at(parser, following) != '#') break;
10834
10835 // If there is a comment, then we need to find the end of the
10836 // comment and continue searching from there.
10837 following = next_newline(following, parser->end - following);
10838 }
10839
10840 // If the lex state was ignored, we will lex the
10841 // ignored newline.
10842 if (lex_state_ignored_p(parser)) {
10843 if (!lexed_comment) parser_lex_ignored_newline(parser);
10844 lexed_comment = false;
10845 goto lex_next_token;
10846 }
10847
10848 // If we hit a '.' or a '&.' we will lex the ignored
10849 // newline.
10850 if (following && (
10851 (peek_at(parser, following) == '.') ||
10852 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10853 )) {
10854 if (!lexed_comment) parser_lex_ignored_newline(parser);
10855 lexed_comment = false;
10856 goto lex_next_token;
10857 }
10858
10859
10860 // If we are parsing as CRuby 3.5 or later and we
10861 // hit a '&&' or a '||' then we will lex the ignored
10862 // newline.
10863 if (
10865 following && (
10866 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10867 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10868 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
10869 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
10870 )
10871 ) {
10872 if (!lexed_comment) parser_lex_ignored_newline(parser);
10873 lexed_comment = false;
10874 goto lex_next_token;
10875 }
10876 }
10877
10878 // If we hit a . after a newline, then we're in a call chain and
10879 // we need to return the call operator.
10880 if (next_content[0] == '.') {
10881 // To match ripper, we need to emit an ignored newline even though
10882 // it's a real newline in the case that we have a beginless range
10883 // on a subsequent line.
10884 if (peek_at(parser, next_content + 1) == '.') {
10885 if (!lexed_comment) parser_lex_ignored_newline(parser);
10886 lex_state_set(parser, PM_LEX_STATE_BEG);
10887 parser->command_start = true;
10888 parser->current.type = PM_TOKEN_NEWLINE;
10889 return;
10890 }
10891
10892 if (!lexed_comment) parser_lex_ignored_newline(parser);
10893 lex_state_set(parser, PM_LEX_STATE_DOT);
10894 parser->current.start = next_content;
10895 parser->current.end = next_content + 1;
10896 parser->next_start = NULL;
10897 LEX(PM_TOKEN_DOT);
10898 }
10899
10900 // If we hit a &. after a newline, then we're in a call chain and
10901 // we need to return the call operator.
10902 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10903 if (!lexed_comment) parser_lex_ignored_newline(parser);
10904 lex_state_set(parser, PM_LEX_STATE_DOT);
10905 parser->current.start = next_content;
10906 parser->current.end = next_content + 2;
10907 parser->next_start = NULL;
10908 LEX(PM_TOKEN_AMPERSAND_DOT);
10909 }
10910
10911 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
10912 // If we hit an && then we are in a logical chain
10913 // and we need to return the logical operator.
10914 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10915 if (!lexed_comment) parser_lex_ignored_newline(parser);
10916 lex_state_set(parser, PM_LEX_STATE_BEG);
10917 parser->current.start = next_content;
10918 parser->current.end = next_content + 2;
10919 parser->next_start = NULL;
10920 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10921 }
10922
10923 // If we hit a || then we are in a logical chain and
10924 // we need to return the logical operator.
10925 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10926 if (!lexed_comment) parser_lex_ignored_newline(parser);
10927 lex_state_set(parser, PM_LEX_STATE_BEG);
10928 parser->current.start = next_content;
10929 parser->current.end = next_content + 2;
10930 parser->next_start = NULL;
10931 LEX(PM_TOKEN_PIPE_PIPE);
10932 }
10933
10934 // If we hit an 'and' then we are in a logical chain
10935 // and we need to return the logical operator.
10936 if (
10937 peek_at(parser, next_content) == 'a' &&
10938 peek_at(parser, next_content + 1) == 'n' &&
10939 peek_at(parser, next_content + 2) == 'd' &&
10940 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10941 ) {
10942 if (!lexed_comment) parser_lex_ignored_newline(parser);
10943 lex_state_set(parser, PM_LEX_STATE_BEG);
10944 parser->current.start = next_content;
10945 parser->current.end = next_content + 3;
10946 parser->next_start = NULL;
10947 parser->command_start = true;
10948 LEX(PM_TOKEN_KEYWORD_AND);
10949 }
10950
10951 // If we hit a 'or' then we are in a logical chain
10952 // and we need to return the logical operator.
10953 if (
10954 peek_at(parser, next_content) == 'o' &&
10955 peek_at(parser, next_content + 1) == 'r' &&
10956 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10957 ) {
10958 if (!lexed_comment) parser_lex_ignored_newline(parser);
10959 lex_state_set(parser, PM_LEX_STATE_BEG);
10960 parser->current.start = next_content;
10961 parser->current.end = next_content + 2;
10962 parser->next_start = NULL;
10963 parser->command_start = true;
10964 LEX(PM_TOKEN_KEYWORD_OR);
10965 }
10966 }
10967 }
10968
10969 // At this point we know this is a regular newline, and we can set the
10970 // necessary state and return the token.
10971 lex_state_set(parser, PM_LEX_STATE_BEG);
10972 parser->command_start = true;
10973 parser->current.type = PM_TOKEN_NEWLINE;
10974 if (!lexed_comment) parser_lex_callback(parser);
10975 return;
10976 }
10977
10978 // ,
10979 case ',':
10980 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10981 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10982 }
10983
10984 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10985 LEX(PM_TOKEN_COMMA);
10986
10987 // (
10988 case '(': {
10989 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10990
10991 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10992 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10993 }
10994
10995 parser->enclosure_nesting++;
10996 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10997 pm_do_loop_stack_push(parser, false);
10998 LEX(type);
10999 }
11000
11001 // )
11002 case ')':
11003 parser->enclosure_nesting--;
11004 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11005 pm_do_loop_stack_pop(parser);
11006 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
11007
11008 // ;
11009 case ';':
11010 lex_state_set(parser, PM_LEX_STATE_BEG);
11011 parser->command_start = true;
11012 LEX(PM_TOKEN_SEMICOLON);
11013
11014 // [ [] []=
11015 case '[':
11016 parser->enclosure_nesting++;
11017 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
11018
11019 if (lex_state_operator_p(parser)) {
11020 if (match(parser, ']')) {
11021 parser->enclosure_nesting--;
11022 lex_state_set(parser, PM_LEX_STATE_ARG);
11023 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
11024 }
11025
11026 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
11027 LEX(type);
11028 }
11029
11030 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
11031 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
11032 }
11033
11034 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11035 pm_do_loop_stack_push(parser, false);
11036 LEX(type);
11037
11038 // ]
11039 case ']':
11040 parser->enclosure_nesting--;
11041 lex_state_set(parser, PM_LEX_STATE_END);
11042 pm_do_loop_stack_pop(parser);
11043 LEX(PM_TOKEN_BRACKET_RIGHT);
11044
11045 // {
11046 case '{': {
11047 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
11048
11049 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
11050 // This { begins a lambda
11051 parser->command_start = true;
11052 lex_state_set(parser, PM_LEX_STATE_BEG);
11053 type = PM_TOKEN_LAMBDA_BEGIN;
11054 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
11055 // This { begins a hash literal
11056 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11057 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
11058 // This { begins a block
11059 parser->command_start = true;
11060 lex_state_set(parser, PM_LEX_STATE_BEG);
11061 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
11062 // This { begins a block on a command
11063 parser->command_start = true;
11064 lex_state_set(parser, PM_LEX_STATE_BEG);
11065 } else {
11066 // This { begins a hash literal
11067 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11068 }
11069
11070 parser->enclosure_nesting++;
11071 parser->brace_nesting++;
11072 pm_do_loop_stack_push(parser, false);
11073
11074 LEX(type);
11075 }
11076
11077 // }
11078 case '}':
11079 parser->enclosure_nesting--;
11080 pm_do_loop_stack_pop(parser);
11081
11082 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11083 lex_mode_pop(parser);
11084 LEX(PM_TOKEN_EMBEXPR_END);
11085 }
11086
11087 parser->brace_nesting--;
11088 lex_state_set(parser, PM_LEX_STATE_END);
11089 LEX(PM_TOKEN_BRACE_RIGHT);
11090
11091 // * ** **= *=
11092 case '*': {
11093 if (match(parser, '*')) {
11094 if (match(parser, '=')) {
11095 lex_state_set(parser, PM_LEX_STATE_BEG);
11096 LEX(PM_TOKEN_STAR_STAR_EQUAL);
11097 }
11098
11099 pm_token_type_t type = PM_TOKEN_STAR_STAR;
11100
11101 if (lex_state_spcarg_p(parser, space_seen)) {
11102 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11103 type = PM_TOKEN_USTAR_STAR;
11104 } else if (lex_state_beg_p(parser)) {
11105 type = PM_TOKEN_USTAR_STAR;
11106 } else if (ambiguous_operator_p(parser, space_seen)) {
11107 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11108 }
11109
11110 if (lex_state_operator_p(parser)) {
11111 lex_state_set(parser, PM_LEX_STATE_ARG);
11112 } else {
11113 lex_state_set(parser, PM_LEX_STATE_BEG);
11114 }
11115
11116 LEX(type);
11117 }
11118
11119 if (match(parser, '=')) {
11120 lex_state_set(parser, PM_LEX_STATE_BEG);
11121 LEX(PM_TOKEN_STAR_EQUAL);
11122 }
11123
11124 pm_token_type_t type = PM_TOKEN_STAR;
11125
11126 if (lex_state_spcarg_p(parser, space_seen)) {
11127 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11128 type = PM_TOKEN_USTAR;
11129 } else if (lex_state_beg_p(parser)) {
11130 type = PM_TOKEN_USTAR;
11131 } else if (ambiguous_operator_p(parser, space_seen)) {
11132 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11133 }
11134
11135 if (lex_state_operator_p(parser)) {
11136 lex_state_set(parser, PM_LEX_STATE_ARG);
11137 } else {
11138 lex_state_set(parser, PM_LEX_STATE_BEG);
11139 }
11140
11141 LEX(type);
11142 }
11143
11144 // ! != !~ !@
11145 case '!':
11146 if (lex_state_operator_p(parser)) {
11147 lex_state_set(parser, PM_LEX_STATE_ARG);
11148 if (match(parser, '@')) {
11149 LEX(PM_TOKEN_BANG);
11150 }
11151 } else {
11152 lex_state_set(parser, PM_LEX_STATE_BEG);
11153 }
11154
11155 if (match(parser, '=')) {
11156 LEX(PM_TOKEN_BANG_EQUAL);
11157 }
11158
11159 if (match(parser, '~')) {
11160 LEX(PM_TOKEN_BANG_TILDE);
11161 }
11162
11163 LEX(PM_TOKEN_BANG);
11164
11165 // = => =~ == === =begin
11166 case '=':
11167 if (
11168 current_token_starts_line(parser) &&
11169 (parser->current.end + 5 <= parser->end) &&
11170 memcmp(parser->current.end, "begin", 5) == 0 &&
11171 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11172 ) {
11173 pm_token_type_t type = lex_embdoc(parser);
11174 if (type == PM_TOKEN_EOF) {
11175 LEX(type);
11176 }
11177
11178 goto lex_next_token;
11179 }
11180
11181 if (lex_state_operator_p(parser)) {
11182 lex_state_set(parser, PM_LEX_STATE_ARG);
11183 } else {
11184 lex_state_set(parser, PM_LEX_STATE_BEG);
11185 }
11186
11187 if (match(parser, '>')) {
11188 LEX(PM_TOKEN_EQUAL_GREATER);
11189 }
11190
11191 if (match(parser, '~')) {
11192 LEX(PM_TOKEN_EQUAL_TILDE);
11193 }
11194
11195 if (match(parser, '=')) {
11196 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11197 }
11198
11199 LEX(PM_TOKEN_EQUAL);
11200
11201 // < << <<= <= <=>
11202 case '<':
11203 if (match(parser, '<')) {
11204 if (
11205 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11206 !lex_state_end_p(parser) &&
11207 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11208 ) {
11209 const uint8_t *end = parser->current.end;
11210
11211 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11212 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11213
11214 if (match(parser, '-')) {
11215 indent = PM_HEREDOC_INDENT_DASH;
11216 }
11217 else if (match(parser, '~')) {
11218 indent = PM_HEREDOC_INDENT_TILDE;
11219 }
11220
11221 if (match(parser, '`')) {
11222 quote = PM_HEREDOC_QUOTE_BACKTICK;
11223 }
11224 else if (match(parser, '"')) {
11225 quote = PM_HEREDOC_QUOTE_DOUBLE;
11226 }
11227 else if (match(parser, '\'')) {
11228 quote = PM_HEREDOC_QUOTE_SINGLE;
11229 }
11230
11231 const uint8_t *ident_start = parser->current.end;
11232 size_t width = 0;
11233
11234 if (parser->current.end >= parser->end) {
11235 parser->current.end = end;
11236 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11237 parser->current.end = end;
11238 } else {
11239 if (quote == PM_HEREDOC_QUOTE_NONE) {
11240 parser->current.end += width;
11241
11242 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11243 parser->current.end += width;
11244 }
11245 } else {
11246 // If we have quotes, then we're going to go until we find the
11247 // end quote.
11248 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11249 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11250 parser->current.end++;
11251 }
11252 }
11253
11254 size_t ident_length = (size_t) (parser->current.end - ident_start);
11255 bool ident_error = false;
11256
11257 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11258 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11259 ident_error = true;
11260 }
11261
11262 parser->explicit_encoding = NULL;
11263 lex_mode_push(parser, (pm_lex_mode_t) {
11264 .mode = PM_LEX_HEREDOC,
11265 .as.heredoc = {
11266 .base = {
11267 .ident_start = ident_start,
11268 .ident_length = ident_length,
11269 .quote = quote,
11270 .indent = indent
11271 },
11272 .next_start = parser->current.end,
11273 .common_whitespace = NULL,
11274 .line_continuation = false
11275 }
11276 });
11277
11278 if (parser->heredoc_end == NULL) {
11279 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11280
11281 if (body_start == NULL) {
11282 // If there is no newline after the heredoc identifier, then
11283 // this is not a valid heredoc declaration. In this case we
11284 // will add an error, but we will still return a heredoc
11285 // start.
11286 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11287 body_start = parser->end;
11288 } else {
11289 // Otherwise, we want to indicate that the body of the
11290 // heredoc starts on the character after the next newline.
11291 pm_newline_list_append(&parser->newline_list, body_start);
11292 body_start++;
11293 }
11294
11295 parser->next_start = body_start;
11296 } else {
11297 parser->next_start = parser->heredoc_end;
11298 }
11299
11300 LEX(PM_TOKEN_HEREDOC_START);
11301 }
11302 }
11303
11304 if (match(parser, '=')) {
11305 lex_state_set(parser, PM_LEX_STATE_BEG);
11306 LEX(PM_TOKEN_LESS_LESS_EQUAL);
11307 }
11308
11309 if (ambiguous_operator_p(parser, space_seen)) {
11310 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11311 }
11312
11313 if (lex_state_operator_p(parser)) {
11314 lex_state_set(parser, PM_LEX_STATE_ARG);
11315 } else {
11316 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11317 lex_state_set(parser, PM_LEX_STATE_BEG);
11318 }
11319
11320 LEX(PM_TOKEN_LESS_LESS);
11321 }
11322
11323 if (lex_state_operator_p(parser)) {
11324 lex_state_set(parser, PM_LEX_STATE_ARG);
11325 } else {
11326 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11327 lex_state_set(parser, PM_LEX_STATE_BEG);
11328 }
11329
11330 if (match(parser, '=')) {
11331 if (match(parser, '>')) {
11332 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
11333 }
11334
11335 LEX(PM_TOKEN_LESS_EQUAL);
11336 }
11337
11338 LEX(PM_TOKEN_LESS);
11339
11340 // > >> >>= >=
11341 case '>':
11342 if (match(parser, '>')) {
11343 if (lex_state_operator_p(parser)) {
11344 lex_state_set(parser, PM_LEX_STATE_ARG);
11345 } else {
11346 lex_state_set(parser, PM_LEX_STATE_BEG);
11347 }
11348 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11349 }
11350
11351 if (lex_state_operator_p(parser)) {
11352 lex_state_set(parser, PM_LEX_STATE_ARG);
11353 } else {
11354 lex_state_set(parser, PM_LEX_STATE_BEG);
11355 }
11356
11357 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11358
11359 // double-quoted string literal
11360 case '"': {
11361 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11362 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11363 LEX(PM_TOKEN_STRING_BEGIN);
11364 }
11365
11366 // xstring literal
11367 case '`': {
11368 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11369 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11370 LEX(PM_TOKEN_BACKTICK);
11371 }
11372
11373 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11374 if (previous_command_start) {
11375 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11376 } else {
11377 lex_state_set(parser, PM_LEX_STATE_ARG);
11378 }
11379
11380 LEX(PM_TOKEN_BACKTICK);
11381 }
11382
11383 lex_mode_push_string(parser, true, false, '\0', '`');
11384 LEX(PM_TOKEN_BACKTICK);
11385 }
11386
11387 // single-quoted string literal
11388 case '\'': {
11389 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11390 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11391 LEX(PM_TOKEN_STRING_BEGIN);
11392 }
11393
11394 // ? character literal
11395 case '?':
11396 LEX(lex_question_mark(parser));
11397
11398 // & && &&= &=
11399 case '&': {
11400 if (match(parser, '&')) {
11401 lex_state_set(parser, PM_LEX_STATE_BEG);
11402
11403 if (match(parser, '=')) {
11404 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
11405 }
11406
11407 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
11408 }
11409
11410 if (match(parser, '=')) {
11411 lex_state_set(parser, PM_LEX_STATE_BEG);
11412 LEX(PM_TOKEN_AMPERSAND_EQUAL);
11413 }
11414
11415 if (match(parser, '.')) {
11416 lex_state_set(parser, PM_LEX_STATE_DOT);
11417 LEX(PM_TOKEN_AMPERSAND_DOT);
11418 }
11419
11420 pm_token_type_t type = PM_TOKEN_AMPERSAND;
11421 if (lex_state_spcarg_p(parser, space_seen)) {
11422 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11423 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11424 } else {
11425 const uint8_t delim = peek_offset(parser, 1);
11426
11427 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11428 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11429 }
11430 }
11431
11432 type = PM_TOKEN_UAMPERSAND;
11433 } else if (lex_state_beg_p(parser)) {
11434 type = PM_TOKEN_UAMPERSAND;
11435 } else if (ambiguous_operator_p(parser, space_seen)) {
11436 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11437 }
11438
11439 if (lex_state_operator_p(parser)) {
11440 lex_state_set(parser, PM_LEX_STATE_ARG);
11441 } else {
11442 lex_state_set(parser, PM_LEX_STATE_BEG);
11443 }
11444
11445 LEX(type);
11446 }
11447
11448 // | || ||= |=
11449 case '|':
11450 if (match(parser, '|')) {
11451 if (match(parser, '=')) {
11452 lex_state_set(parser, PM_LEX_STATE_BEG);
11453 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
11454 }
11455
11456 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11457 parser->current.end--;
11458 LEX(PM_TOKEN_PIPE);
11459 }
11460
11461 lex_state_set(parser, PM_LEX_STATE_BEG);
11462 LEX(PM_TOKEN_PIPE_PIPE);
11463 }
11464
11465 if (match(parser, '=')) {
11466 lex_state_set(parser, PM_LEX_STATE_BEG);
11467 LEX(PM_TOKEN_PIPE_EQUAL);
11468 }
11469
11470 if (lex_state_operator_p(parser)) {
11471 lex_state_set(parser, PM_LEX_STATE_ARG);
11472 } else {
11473 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11474 }
11475
11476 LEX(PM_TOKEN_PIPE);
11477
11478 // + += +@
11479 case '+': {
11480 if (lex_state_operator_p(parser)) {
11481 lex_state_set(parser, PM_LEX_STATE_ARG);
11482
11483 if (match(parser, '@')) {
11484 LEX(PM_TOKEN_UPLUS);
11485 }
11486
11487 LEX(PM_TOKEN_PLUS);
11488 }
11489
11490 if (match(parser, '=')) {
11491 lex_state_set(parser, PM_LEX_STATE_BEG);
11492 LEX(PM_TOKEN_PLUS_EQUAL);
11493 }
11494
11495 if (
11496 lex_state_beg_p(parser) ||
11497 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11498 ) {
11499 lex_state_set(parser, PM_LEX_STATE_BEG);
11500
11501 if (pm_char_is_decimal_digit(peek(parser))) {
11502 parser->current.end++;
11503 pm_token_type_t type = lex_numeric(parser);
11504 lex_state_set(parser, PM_LEX_STATE_END);
11505 LEX(type);
11506 }
11507
11508 LEX(PM_TOKEN_UPLUS);
11509 }
11510
11511 if (ambiguous_operator_p(parser, space_seen)) {
11512 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11513 }
11514
11515 lex_state_set(parser, PM_LEX_STATE_BEG);
11516 LEX(PM_TOKEN_PLUS);
11517 }
11518
11519 // - -= -@
11520 case '-': {
11521 if (lex_state_operator_p(parser)) {
11522 lex_state_set(parser, PM_LEX_STATE_ARG);
11523
11524 if (match(parser, '@')) {
11525 LEX(PM_TOKEN_UMINUS);
11526 }
11527
11528 LEX(PM_TOKEN_MINUS);
11529 }
11530
11531 if (match(parser, '=')) {
11532 lex_state_set(parser, PM_LEX_STATE_BEG);
11533 LEX(PM_TOKEN_MINUS_EQUAL);
11534 }
11535
11536 if (match(parser, '>')) {
11537 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11538 LEX(PM_TOKEN_MINUS_GREATER);
11539 }
11540
11541 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11542 bool is_beg = lex_state_beg_p(parser);
11543 if (!is_beg && spcarg) {
11544 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11545 }
11546
11547 if (is_beg || spcarg) {
11548 lex_state_set(parser, PM_LEX_STATE_BEG);
11549 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11550 }
11551
11552 if (ambiguous_operator_p(parser, space_seen)) {
11553 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11554 }
11555
11556 lex_state_set(parser, PM_LEX_STATE_BEG);
11557 LEX(PM_TOKEN_MINUS);
11558 }
11559
11560 // . .. ...
11561 case '.': {
11562 bool beg_p = lex_state_beg_p(parser);
11563
11564 if (match(parser, '.')) {
11565 if (match(parser, '.')) {
11566 // If we're _not_ inside a range within default parameters
11567 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11568 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11569 lex_state_set(parser, PM_LEX_STATE_BEG);
11570 } else {
11571 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11572 }
11573 LEX(PM_TOKEN_UDOT_DOT_DOT);
11574 }
11575
11576 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11577 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11578 }
11579
11580 lex_state_set(parser, PM_LEX_STATE_BEG);
11581 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
11582 }
11583
11584 lex_state_set(parser, PM_LEX_STATE_BEG);
11585 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11586 }
11587
11588 lex_state_set(parser, PM_LEX_STATE_DOT);
11589 LEX(PM_TOKEN_DOT);
11590 }
11591
11592 // integer
11593 case '0':
11594 case '1':
11595 case '2':
11596 case '3':
11597 case '4':
11598 case '5':
11599 case '6':
11600 case '7':
11601 case '8':
11602 case '9': {
11603 pm_token_type_t type = lex_numeric(parser);
11604 lex_state_set(parser, PM_LEX_STATE_END);
11605 LEX(type);
11606 }
11607
11608 // :: symbol
11609 case ':':
11610 if (match(parser, ':')) {
11611 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11612 lex_state_set(parser, PM_LEX_STATE_BEG);
11613 LEX(PM_TOKEN_UCOLON_COLON);
11614 }
11615
11616 lex_state_set(parser, PM_LEX_STATE_DOT);
11617 LEX(PM_TOKEN_COLON_COLON);
11618 }
11619
11620 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11621 lex_state_set(parser, PM_LEX_STATE_BEG);
11622 LEX(PM_TOKEN_COLON);
11623 }
11624
11625 if (peek(parser) == '"' || peek(parser) == '\'') {
11626 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11627 parser->current.end++;
11628 }
11629
11630 lex_state_set(parser, PM_LEX_STATE_FNAME);
11631 LEX(PM_TOKEN_SYMBOL_BEGIN);
11632
11633 // / /=
11634 case '/':
11635 if (lex_state_beg_p(parser)) {
11636 lex_mode_push_regexp(parser, '\0', '/');
11637 LEX(PM_TOKEN_REGEXP_BEGIN);
11638 }
11639
11640 if (match(parser, '=')) {
11641 lex_state_set(parser, PM_LEX_STATE_BEG);
11642 LEX(PM_TOKEN_SLASH_EQUAL);
11643 }
11644
11645 if (lex_state_spcarg_p(parser, space_seen)) {
11646 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11647 lex_mode_push_regexp(parser, '\0', '/');
11648 LEX(PM_TOKEN_REGEXP_BEGIN);
11649 }
11650
11651 if (ambiguous_operator_p(parser, space_seen)) {
11652 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11653 }
11654
11655 if (lex_state_operator_p(parser)) {
11656 lex_state_set(parser, PM_LEX_STATE_ARG);
11657 } else {
11658 lex_state_set(parser, PM_LEX_STATE_BEG);
11659 }
11660
11661 LEX(PM_TOKEN_SLASH);
11662
11663 // ^ ^=
11664 case '^':
11665 if (lex_state_operator_p(parser)) {
11666 lex_state_set(parser, PM_LEX_STATE_ARG);
11667 } else {
11668 lex_state_set(parser, PM_LEX_STATE_BEG);
11669 }
11670 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11671
11672 // ~ ~@
11673 case '~':
11674 if (lex_state_operator_p(parser)) {
11675 (void) match(parser, '@');
11676 lex_state_set(parser, PM_LEX_STATE_ARG);
11677 } else {
11678 lex_state_set(parser, PM_LEX_STATE_BEG);
11679 }
11680
11681 LEX(PM_TOKEN_TILDE);
11682
11683 // % %= %i %I %q %Q %w %W
11684 case '%': {
11685 // If there is no subsequent character then we have an
11686 // invalid token. We're going to say it's the percent
11687 // operator because we don't want to move into the string
11688 // lex mode unnecessarily.
11689 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11690 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11691 LEX(PM_TOKEN_PERCENT);
11692 }
11693
11694 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11695 lex_state_set(parser, PM_LEX_STATE_BEG);
11696 LEX(PM_TOKEN_PERCENT_EQUAL);
11697 } else if (
11698 lex_state_beg_p(parser) ||
11699 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11700 lex_state_spcarg_p(parser, space_seen)
11701 ) {
11702 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11703 if (*parser->current.end >= 0x80) {
11704 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11705 }
11706
11707 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11708 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11709 LEX(PM_TOKEN_STRING_BEGIN);
11710 }
11711
11712 // Delimiters for %-literals cannot be alphanumeric. We
11713 // validate that here.
11714 uint8_t delimiter = peek_offset(parser, 1);
11715 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11716 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11717 goto lex_next_token;
11718 }
11719
11720 switch (peek(parser)) {
11721 case 'i': {
11722 parser->current.end++;
11723
11724 if (parser->current.end < parser->end) {
11725 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11726 } else {
11727 lex_mode_push_list_eof(parser);
11728 }
11729
11730 LEX(PM_TOKEN_PERCENT_LOWER_I);
11731 }
11732 case 'I': {
11733 parser->current.end++;
11734
11735 if (parser->current.end < parser->end) {
11736 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11737 } else {
11738 lex_mode_push_list_eof(parser);
11739 }
11740
11741 LEX(PM_TOKEN_PERCENT_UPPER_I);
11742 }
11743 case 'r': {
11744 parser->current.end++;
11745
11746 if (parser->current.end < parser->end) {
11747 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11748 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11749 } else {
11750 lex_mode_push_regexp(parser, '\0', '\0');
11751 }
11752
11753 LEX(PM_TOKEN_REGEXP_BEGIN);
11754 }
11755 case 'q': {
11756 parser->current.end++;
11757
11758 if (parser->current.end < parser->end) {
11759 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11760 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11761 } else {
11762 lex_mode_push_string_eof(parser);
11763 }
11764
11765 LEX(PM_TOKEN_STRING_BEGIN);
11766 }
11767 case 'Q': {
11768 parser->current.end++;
11769
11770 if (parser->current.end < parser->end) {
11771 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11772 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11773 } else {
11774 lex_mode_push_string_eof(parser);
11775 }
11776
11777 LEX(PM_TOKEN_STRING_BEGIN);
11778 }
11779 case 's': {
11780 parser->current.end++;
11781
11782 if (parser->current.end < parser->end) {
11783 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11784 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11785 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11786 } else {
11787 lex_mode_push_string_eof(parser);
11788 }
11789
11790 LEX(PM_TOKEN_SYMBOL_BEGIN);
11791 }
11792 case 'w': {
11793 parser->current.end++;
11794
11795 if (parser->current.end < parser->end) {
11796 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11797 } else {
11798 lex_mode_push_list_eof(parser);
11799 }
11800
11801 LEX(PM_TOKEN_PERCENT_LOWER_W);
11802 }
11803 case 'W': {
11804 parser->current.end++;
11805
11806 if (parser->current.end < parser->end) {
11807 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11808 } else {
11809 lex_mode_push_list_eof(parser);
11810 }
11811
11812 LEX(PM_TOKEN_PERCENT_UPPER_W);
11813 }
11814 case 'x': {
11815 parser->current.end++;
11816
11817 if (parser->current.end < parser->end) {
11818 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11819 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11820 } else {
11821 lex_mode_push_string_eof(parser);
11822 }
11823
11824 LEX(PM_TOKEN_PERCENT_LOWER_X);
11825 }
11826 default:
11827 // If we get to this point, then we have a % that is completely
11828 // unparsable. In this case we'll just drop it from the parser
11829 // and skip past it and hope that the next token is something
11830 // that we can parse.
11831 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11832 goto lex_next_token;
11833 }
11834 }
11835
11836 if (ambiguous_operator_p(parser, space_seen)) {
11837 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11838 }
11839
11840 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11841 LEX(PM_TOKEN_PERCENT);
11842 }
11843
11844 // global variable
11845 case '$': {
11846 pm_token_type_t type = lex_global_variable(parser);
11847
11848 // If we're lexing an embedded variable, then we need to pop back into
11849 // the parent lex context.
11850 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11851 lex_mode_pop(parser);
11852 }
11853
11854 lex_state_set(parser, PM_LEX_STATE_END);
11855 LEX(type);
11856 }
11857
11858 // instance variable, class variable
11859 case '@':
11860 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11861 LEX(lex_at_variable(parser));
11862
11863 default: {
11864 if (*parser->current.start != '_') {
11865 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11866
11867 // If this isn't the beginning of an identifier, then
11868 // it's an invalid token as we've exhausted all of the
11869 // other options. We'll skip past it and return the next
11870 // token after adding an appropriate error message.
11871 if (!width) {
11872 if (*parser->current.start >= 0x80) {
11873 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11874 } else if (*parser->current.start == '\\') {
11875 switch (peek_at(parser, parser->current.start + 1)) {
11876 case ' ':
11877 parser->current.end++;
11878 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11879 break;
11880 case '\f':
11881 parser->current.end++;
11882 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11883 break;
11884 case '\t':
11885 parser->current.end++;
11886 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11887 break;
11888 case '\v':
11889 parser->current.end++;
11890 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11891 break;
11892 case '\r':
11893 if (peek_at(parser, parser->current.start + 2) != '\n') {
11894 parser->current.end++;
11895 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11896 break;
11897 }
11899 default:
11900 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11901 break;
11902 }
11903 } else if (char_is_ascii_printable(*parser->current.start)) {
11904 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11905 } else {
11906 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11907 }
11908
11909 goto lex_next_token;
11910 }
11911
11912 parser->current.end = parser->current.start + width;
11913 }
11914
11915 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11916
11917 // If we've hit a __END__ and it was at the start of the
11918 // line or the start of the file and it is followed by
11919 // either a \n or a \r\n, then this is the last token of the
11920 // file.
11921 if (
11922 ((parser->current.end - parser->current.start) == 7) &&
11923 current_token_starts_line(parser) &&
11924 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11925 (parser->current.end == parser->end || match_eol(parser))
11926 ) {
11927 // Since we know we're about to add an __END__ comment,
11928 // we know we need to add all of the newlines to get the
11929 // correct column information for it.
11930 const uint8_t *cursor = parser->current.end;
11931 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11932 pm_newline_list_append(&parser->newline_list, cursor++);
11933 }
11934
11935 parser->current.end = parser->end;
11936 parser->current.type = PM_TOKEN___END__;
11937 parser_lex_callback(parser);
11938
11939 parser->data_loc.start = parser->current.start;
11940 parser->data_loc.end = parser->current.end;
11941
11942 LEX(PM_TOKEN_EOF);
11943 }
11944
11945 pm_lex_state_t last_state = parser->lex_state;
11946
11947 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11948 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11949 if (previous_command_start) {
11950 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11951 } else {
11952 lex_state_set(parser, PM_LEX_STATE_ARG);
11953 }
11954 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11955 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11956 } else {
11957 lex_state_set(parser, PM_LEX_STATE_END);
11958 }
11959 }
11960
11961 if (
11962 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11963 (type == PM_TOKEN_IDENTIFIER) &&
11964 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11965 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11966 ) {
11967 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11968 }
11969
11970 LEX(type);
11971 }
11972 }
11973 }
11974 case PM_LEX_LIST: {
11975 if (parser->next_start != NULL) {
11976 parser->current.end = parser->next_start;
11977 parser->next_start = NULL;
11978 }
11979
11980 // First we'll set the beginning of the token.
11981 parser->current.start = parser->current.end;
11982
11983 // If there's any whitespace at the start of the list, then we're
11984 // going to trim it off the beginning and create a new token.
11985 size_t whitespace;
11986
11987 if (parser->heredoc_end) {
11988 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11989 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11990 whitespace += 1;
11991 }
11992 } else {
11993 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11994 }
11995
11996 if (whitespace > 0) {
11997 parser->current.end += whitespace;
11998 if (peek_offset(parser, -1) == '\n') {
11999 // mutates next_start
12000 parser_flush_heredoc_end(parser);
12001 }
12002 LEX(PM_TOKEN_WORDS_SEP);
12003 }
12004
12005 // We'll check if we're at the end of the file. If we are, then we
12006 // need to return the EOF token.
12007 if (parser->current.end >= parser->end) {
12008 LEX(PM_TOKEN_EOF);
12009 }
12010
12011 // Here we'll get a list of the places where strpbrk should break,
12012 // and then find the first one.
12013 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12014 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
12015 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12016
12017 // If we haven't found an escape yet, then this buffer will be
12018 // unallocated since we can refer directly to the source string.
12019 pm_token_buffer_t token_buffer = { 0 };
12020
12021 while (breakpoint != NULL) {
12022 // If we hit whitespace, then we must have received content by
12023 // now, so we can return an element of the list.
12024 if (pm_char_is_whitespace(*breakpoint)) {
12025 parser->current.end = breakpoint;
12026 pm_token_buffer_flush(parser, &token_buffer);
12027 LEX(PM_TOKEN_STRING_CONTENT);
12028 }
12029
12030 // If we hit the terminator, we need to check which token to
12031 // return.
12032 if (*breakpoint == lex_mode->as.list.terminator) {
12033 // If this terminator doesn't actually close the list, then
12034 // we need to continue on past it.
12035 if (lex_mode->as.list.nesting > 0) {
12036 parser->current.end = breakpoint + 1;
12037 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12038 lex_mode->as.list.nesting--;
12039 continue;
12040 }
12041
12042 // If we've hit the terminator and we've already skipped
12043 // past content, then we can return a list node.
12044 if (breakpoint > parser->current.start) {
12045 parser->current.end = breakpoint;
12046 pm_token_buffer_flush(parser, &token_buffer);
12047 LEX(PM_TOKEN_STRING_CONTENT);
12048 }
12049
12050 // Otherwise, switch back to the default state and return
12051 // the end of the list.
12052 parser->current.end = breakpoint + 1;
12053 lex_mode_pop(parser);
12054 lex_state_set(parser, PM_LEX_STATE_END);
12055 LEX(PM_TOKEN_STRING_END);
12056 }
12057
12058 // If we hit a null byte, skip directly past it.
12059 if (*breakpoint == '\0') {
12060 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
12061 continue;
12062 }
12063
12064 // If we hit escapes, then we need to treat the next token
12065 // literally. In this case we'll skip past the next character
12066 // and find the next breakpoint.
12067 if (*breakpoint == '\\') {
12068 parser->current.end = breakpoint + 1;
12069
12070 // If we've hit the end of the file, then break out of the
12071 // loop by setting the breakpoint to NULL.
12072 if (parser->current.end == parser->end) {
12073 breakpoint = NULL;
12074 continue;
12075 }
12076
12077 pm_token_buffer_escape(parser, &token_buffer);
12078 uint8_t peeked = peek(parser);
12079
12080 switch (peeked) {
12081 case ' ':
12082 case '\f':
12083 case '\t':
12084 case '\v':
12085 case '\\':
12086 pm_token_buffer_push_byte(&token_buffer, peeked);
12087 parser->current.end++;
12088 break;
12089 case '\r':
12090 parser->current.end++;
12091 if (peek(parser) != '\n') {
12092 pm_token_buffer_push_byte(&token_buffer, '\r');
12093 break;
12094 }
12096 case '\n':
12097 pm_token_buffer_push_byte(&token_buffer, '\n');
12098
12099 if (parser->heredoc_end) {
12100 // ... if we are on the same line as a heredoc,
12101 // flush the heredoc and continue parsing after
12102 // heredoc_end.
12103 parser_flush_heredoc_end(parser);
12104 pm_token_buffer_copy(parser, &token_buffer);
12105 LEX(PM_TOKEN_STRING_CONTENT);
12106 } else {
12107 // ... else track the newline.
12108 pm_newline_list_append(&parser->newline_list, parser->current.end);
12109 }
12110
12111 parser->current.end++;
12112 break;
12113 default:
12114 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12115 pm_token_buffer_push_byte(&token_buffer, peeked);
12116 parser->current.end++;
12117 } else if (lex_mode->as.list.interpolation) {
12118 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12119 } else {
12120 pm_token_buffer_push_byte(&token_buffer, '\\');
12121 pm_token_buffer_push_escaped(&token_buffer, parser);
12122 }
12123
12124 break;
12125 }
12126
12127 token_buffer.cursor = parser->current.end;
12128 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12129 continue;
12130 }
12131
12132 // If we hit a #, then we will attempt to lex interpolation.
12133 if (*breakpoint == '#') {
12134 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12135
12136 if (type == PM_TOKEN_NOT_PROVIDED) {
12137 // If we haven't returned at this point then we had something
12138 // that looked like an interpolated class or instance variable
12139 // like "#@" but wasn't actually. In this case we'll just skip
12140 // to the next breakpoint.
12141 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12142 continue;
12143 }
12144
12145 if (type == PM_TOKEN_STRING_CONTENT) {
12146 pm_token_buffer_flush(parser, &token_buffer);
12147 }
12148
12149 LEX(type);
12150 }
12151
12152 // If we've hit the incrementor, then we need to skip past it
12153 // and find the next breakpoint.
12154 assert(*breakpoint == lex_mode->as.list.incrementor);
12155 parser->current.end = breakpoint + 1;
12156 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12157 lex_mode->as.list.nesting++;
12158 continue;
12159 }
12160
12161 if (parser->current.end > parser->current.start) {
12162 pm_token_buffer_flush(parser, &token_buffer);
12163 LEX(PM_TOKEN_STRING_CONTENT);
12164 }
12165
12166 // If we were unable to find a breakpoint, then this token hits the
12167 // end of the file.
12168 parser->current.end = parser->end;
12169 pm_token_buffer_flush(parser, &token_buffer);
12170 LEX(PM_TOKEN_STRING_CONTENT);
12171 }
12172 case PM_LEX_REGEXP: {
12173 // First, we'll set to start of this token to be the current end.
12174 if (parser->next_start == NULL) {
12175 parser->current.start = parser->current.end;
12176 } else {
12177 parser->current.start = parser->next_start;
12178 parser->current.end = parser->next_start;
12179 parser->next_start = NULL;
12180 }
12181
12182 // We'll check if we're at the end of the file. If we are, then we
12183 // need to return the EOF token.
12184 if (parser->current.end >= parser->end) {
12185 LEX(PM_TOKEN_EOF);
12186 }
12187
12188 // Get a reference to the current mode.
12189 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12190
12191 // These are the places where we need to split up the content of the
12192 // regular expression. We'll use strpbrk to find the first of these
12193 // characters.
12194 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12195 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12196 pm_regexp_token_buffer_t token_buffer = { 0 };
12197
12198 while (breakpoint != NULL) {
12199 uint8_t term = lex_mode->as.regexp.terminator;
12200 bool is_terminator = (*breakpoint == term);
12201
12202 // If the terminator is newline, we need to consider \r\n _also_ a newline
12203 // For example: `%\nfoo\r\n`
12204 // The string should be "foo", not "foo\r"
12205 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12206 if (term == '\n') {
12207 is_terminator = true;
12208 }
12209
12210 // If the terminator is a CR, but we see a CRLF, we need to
12211 // treat the CRLF as a newline, meaning this is _not_ the
12212 // terminator
12213 if (term == '\r') {
12214 is_terminator = false;
12215 }
12216 }
12217
12218 // If we hit the terminator, we need to determine what kind of
12219 // token to return.
12220 if (is_terminator) {
12221 if (lex_mode->as.regexp.nesting > 0) {
12222 parser->current.end = breakpoint + 1;
12223 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12224 lex_mode->as.regexp.nesting--;
12225 continue;
12226 }
12227
12228 // Here we've hit the terminator. If we have already consumed
12229 // content then we need to return that content as string content
12230 // first.
12231 if (breakpoint > parser->current.start) {
12232 parser->current.end = breakpoint;
12233 pm_regexp_token_buffer_flush(parser, &token_buffer);
12234 LEX(PM_TOKEN_STRING_CONTENT);
12235 }
12236
12237 // Check here if we need to track the newline.
12238 size_t eol_length = match_eol_at(parser, breakpoint);
12239 if (eol_length) {
12240 parser->current.end = breakpoint + eol_length;
12241 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12242 } else {
12243 parser->current.end = breakpoint + 1;
12244 }
12245
12246 // Since we've hit the terminator of the regular expression,
12247 // we now need to parse the options.
12248 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12249
12250 lex_mode_pop(parser);
12251 lex_state_set(parser, PM_LEX_STATE_END);
12252 LEX(PM_TOKEN_REGEXP_END);
12253 }
12254
12255 // If we've hit the incrementor, then we need to skip past it
12256 // and find the next breakpoint.
12257 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12258 parser->current.end = breakpoint + 1;
12259 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12260 lex_mode->as.regexp.nesting++;
12261 continue;
12262 }
12263
12264 switch (*breakpoint) {
12265 case '\0':
12266 // If we hit a null byte, skip directly past it.
12267 parser->current.end = breakpoint + 1;
12268 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12269 break;
12270 case '\r':
12271 if (peek_at(parser, breakpoint + 1) != '\n') {
12272 parser->current.end = breakpoint + 1;
12273 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12274 break;
12275 }
12276
12277 breakpoint++;
12278 parser->current.end = breakpoint;
12279 pm_regexp_token_buffer_escape(parser, &token_buffer);
12280 token_buffer.base.cursor = breakpoint;
12281
12283 case '\n':
12284 // If we've hit a newline, then we need to track that in
12285 // the list of newlines.
12286 if (parser->heredoc_end == NULL) {
12287 pm_newline_list_append(&parser->newline_list, breakpoint);
12288 parser->current.end = breakpoint + 1;
12289 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12290 break;
12291 }
12292
12293 parser->current.end = breakpoint + 1;
12294 parser_flush_heredoc_end(parser);
12295 pm_regexp_token_buffer_flush(parser, &token_buffer);
12296 LEX(PM_TOKEN_STRING_CONTENT);
12297 case '\\': {
12298 // If we hit escapes, then we need to treat the next
12299 // token literally. In this case we'll skip past the
12300 // next character and find the next breakpoint.
12301 parser->current.end = breakpoint + 1;
12302
12303 // If we've hit the end of the file, then break out of
12304 // the loop by setting the breakpoint to NULL.
12305 if (parser->current.end == parser->end) {
12306 breakpoint = NULL;
12307 break;
12308 }
12309
12310 pm_regexp_token_buffer_escape(parser, &token_buffer);
12311 uint8_t peeked = peek(parser);
12312
12313 switch (peeked) {
12314 case '\r':
12315 parser->current.end++;
12316 if (peek(parser) != '\n') {
12317 if (lex_mode->as.regexp.terminator != '\r') {
12318 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12319 }
12320 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12321 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12322 break;
12323 }
12325 case '\n':
12326 if (parser->heredoc_end) {
12327 // ... if we are on the same line as a heredoc,
12328 // flush the heredoc and continue parsing after
12329 // heredoc_end.
12330 parser_flush_heredoc_end(parser);
12331 pm_regexp_token_buffer_copy(parser, &token_buffer);
12332 LEX(PM_TOKEN_STRING_CONTENT);
12333 } else {
12334 // ... else track the newline.
12335 pm_newline_list_append(&parser->newline_list, parser->current.end);
12336 }
12337
12338 parser->current.end++;
12339 break;
12340 case 'c':
12341 case 'C':
12342 case 'M':
12343 case 'u':
12344 case 'x':
12345 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12346 break;
12347 default:
12348 if (lex_mode->as.regexp.terminator == peeked) {
12349 // Some characters when they are used as the
12350 // terminator also receive an escape. They are
12351 // enumerated here.
12352 switch (peeked) {
12353 case '$': case ')': case '*': case '+':
12354 case '.': case '>': case '?': case ']':
12355 case '^': case '|': case '}':
12356 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12357 break;
12358 default:
12359 break;
12360 }
12361
12362 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12363 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12364 parser->current.end++;
12365 break;
12366 }
12367
12368 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12369 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12370 break;
12371 }
12372
12373 token_buffer.base.cursor = parser->current.end;
12374 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12375 break;
12376 }
12377 case '#': {
12378 // If we hit a #, then we will attempt to lex
12379 // interpolation.
12380 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12381
12382 if (type == PM_TOKEN_NOT_PROVIDED) {
12383 // If we haven't returned at this point then we had
12384 // something that looked like an interpolated class or
12385 // instance variable like "#@" but wasn't actually. In
12386 // this case we'll just skip to the next breakpoint.
12387 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12388 break;
12389 }
12390
12391 if (type == PM_TOKEN_STRING_CONTENT) {
12392 pm_regexp_token_buffer_flush(parser, &token_buffer);
12393 }
12394
12395 LEX(type);
12396 }
12397 default:
12398 assert(false && "unreachable");
12399 break;
12400 }
12401 }
12402
12403 if (parser->current.end > parser->current.start) {
12404 pm_regexp_token_buffer_flush(parser, &token_buffer);
12405 LEX(PM_TOKEN_STRING_CONTENT);
12406 }
12407
12408 // If we were unable to find a breakpoint, then this token hits the
12409 // end of the file.
12410 parser->current.end = parser->end;
12411 pm_regexp_token_buffer_flush(parser, &token_buffer);
12412 LEX(PM_TOKEN_STRING_CONTENT);
12413 }
12414 case PM_LEX_STRING: {
12415 // First, we'll set to start of this token to be the current end.
12416 if (parser->next_start == NULL) {
12417 parser->current.start = parser->current.end;
12418 } else {
12419 parser->current.start = parser->next_start;
12420 parser->current.end = parser->next_start;
12421 parser->next_start = NULL;
12422 }
12423
12424 // We'll check if we're at the end of the file. If we are, then we need to
12425 // return the EOF token.
12426 if (parser->current.end >= parser->end) {
12427 LEX(PM_TOKEN_EOF);
12428 }
12429
12430 // These are the places where we need to split up the content of the
12431 // string. We'll use strpbrk to find the first of these characters.
12432 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12433 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12434 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12435
12436 // If we haven't found an escape yet, then this buffer will be
12437 // unallocated since we can refer directly to the source string.
12438 pm_token_buffer_t token_buffer = { 0 };
12439
12440 while (breakpoint != NULL) {
12441 // If we hit the incrementor, then we'll increment then nesting and
12442 // continue lexing.
12443 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12444 lex_mode->as.string.nesting++;
12445 parser->current.end = breakpoint + 1;
12446 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12447 continue;
12448 }
12449
12450 uint8_t term = lex_mode->as.string.terminator;
12451 bool is_terminator = (*breakpoint == term);
12452
12453 // If the terminator is newline, we need to consider \r\n _also_ a newline
12454 // For example: `%r\nfoo\r\n`
12455 // The string should be /foo/, not /foo\r/
12456 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12457 if (term == '\n') {
12458 is_terminator = true;
12459 }
12460
12461 // If the terminator is a CR, but we see a CRLF, we need to
12462 // treat the CRLF as a newline, meaning this is _not_ the
12463 // terminator
12464 if (term == '\r') {
12465 is_terminator = false;
12466 }
12467 }
12468
12469 // Note that we have to check the terminator here first because we could
12470 // potentially be parsing a % string that has a # character as the
12471 // terminator.
12472 if (is_terminator) {
12473 // If this terminator doesn't actually close the string, then we need
12474 // to continue on past it.
12475 if (lex_mode->as.string.nesting > 0) {
12476 parser->current.end = breakpoint + 1;
12477 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12478 lex_mode->as.string.nesting--;
12479 continue;
12480 }
12481
12482 // Here we've hit the terminator. If we have already consumed content
12483 // then we need to return that content as string content first.
12484 if (breakpoint > parser->current.start) {
12485 parser->current.end = breakpoint;
12486 pm_token_buffer_flush(parser, &token_buffer);
12487 LEX(PM_TOKEN_STRING_CONTENT);
12488 }
12489
12490 // Otherwise we need to switch back to the parent lex mode and
12491 // return the end of the string.
12492 size_t eol_length = match_eol_at(parser, breakpoint);
12493 if (eol_length) {
12494 parser->current.end = breakpoint + eol_length;
12495 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12496 } else {
12497 parser->current.end = breakpoint + 1;
12498 }
12499
12500 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12501 parser->current.end++;
12502 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12503 lex_mode_pop(parser);
12504 LEX(PM_TOKEN_LABEL_END);
12505 }
12506
12507 lex_state_set(parser, PM_LEX_STATE_END);
12508 lex_mode_pop(parser);
12509 LEX(PM_TOKEN_STRING_END);
12510 }
12511
12512 switch (*breakpoint) {
12513 case '\0':
12514 // Skip directly past the null character.
12515 parser->current.end = breakpoint + 1;
12516 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12517 break;
12518 case '\r':
12519 if (peek_at(parser, breakpoint + 1) != '\n') {
12520 parser->current.end = breakpoint + 1;
12521 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12522 break;
12523 }
12524
12525 // If we hit a \r\n sequence, then we need to treat it
12526 // as a newline.
12527 breakpoint++;
12528 parser->current.end = breakpoint;
12529 pm_token_buffer_escape(parser, &token_buffer);
12530 token_buffer.cursor = breakpoint;
12531
12533 case '\n':
12534 // When we hit a newline, we need to flush any potential
12535 // heredocs. Note that this has to happen after we check
12536 // for the terminator in case the terminator is a
12537 // newline character.
12538 if (parser->heredoc_end == NULL) {
12539 pm_newline_list_append(&parser->newline_list, breakpoint);
12540 parser->current.end = breakpoint + 1;
12541 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12542 break;
12543 }
12544
12545 parser->current.end = breakpoint + 1;
12546 parser_flush_heredoc_end(parser);
12547 pm_token_buffer_flush(parser, &token_buffer);
12548 LEX(PM_TOKEN_STRING_CONTENT);
12549 case '\\': {
12550 // Here we hit escapes.
12551 parser->current.end = breakpoint + 1;
12552
12553 // If we've hit the end of the file, then break out of
12554 // the loop by setting the breakpoint to NULL.
12555 if (parser->current.end == parser->end) {
12556 breakpoint = NULL;
12557 continue;
12558 }
12559
12560 pm_token_buffer_escape(parser, &token_buffer);
12561 uint8_t peeked = peek(parser);
12562
12563 switch (peeked) {
12564 case '\\':
12565 pm_token_buffer_push_byte(&token_buffer, '\\');
12566 parser->current.end++;
12567 break;
12568 case '\r':
12569 parser->current.end++;
12570 if (peek(parser) != '\n') {
12571 if (!lex_mode->as.string.interpolation) {
12572 pm_token_buffer_push_byte(&token_buffer, '\\');
12573 }
12574 pm_token_buffer_push_byte(&token_buffer, '\r');
12575 break;
12576 }
12578 case '\n':
12579 if (!lex_mode->as.string.interpolation) {
12580 pm_token_buffer_push_byte(&token_buffer, '\\');
12581 pm_token_buffer_push_byte(&token_buffer, '\n');
12582 }
12583
12584 if (parser->heredoc_end) {
12585 // ... if we are on the same line as a heredoc,
12586 // flush the heredoc and continue parsing after
12587 // heredoc_end.
12588 parser_flush_heredoc_end(parser);
12589 pm_token_buffer_copy(parser, &token_buffer);
12590 LEX(PM_TOKEN_STRING_CONTENT);
12591 } else {
12592 // ... else track the newline.
12593 pm_newline_list_append(&parser->newline_list, parser->current.end);
12594 }
12595
12596 parser->current.end++;
12597 break;
12598 default:
12599 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12600 pm_token_buffer_push_byte(&token_buffer, peeked);
12601 parser->current.end++;
12602 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12603 pm_token_buffer_push_byte(&token_buffer, peeked);
12604 parser->current.end++;
12605 } else if (lex_mode->as.string.interpolation) {
12606 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12607 } else {
12608 pm_token_buffer_push_byte(&token_buffer, '\\');
12609 pm_token_buffer_push_escaped(&token_buffer, parser);
12610 }
12611
12612 break;
12613 }
12614
12615 token_buffer.cursor = parser->current.end;
12616 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12617 break;
12618 }
12619 case '#': {
12620 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12621
12622 if (type == PM_TOKEN_NOT_PROVIDED) {
12623 // If we haven't returned at this point then we had something that
12624 // looked like an interpolated class or instance variable like "#@"
12625 // but wasn't actually. In this case we'll just skip to the next
12626 // breakpoint.
12627 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12628 break;
12629 }
12630
12631 if (type == PM_TOKEN_STRING_CONTENT) {
12632 pm_token_buffer_flush(parser, &token_buffer);
12633 }
12634
12635 LEX(type);
12636 }
12637 default:
12638 assert(false && "unreachable");
12639 }
12640 }
12641
12642 if (parser->current.end > parser->current.start) {
12643 pm_token_buffer_flush(parser, &token_buffer);
12644 LEX(PM_TOKEN_STRING_CONTENT);
12645 }
12646
12647 // If we've hit the end of the string, then this is an unterminated
12648 // string. In that case we'll return a string content token.
12649 parser->current.end = parser->end;
12650 pm_token_buffer_flush(parser, &token_buffer);
12651 LEX(PM_TOKEN_STRING_CONTENT);
12652 }
12653 case PM_LEX_HEREDOC: {
12654 // First, we'll set to start of this token.
12655 if (parser->next_start == NULL) {
12656 parser->current.start = parser->current.end;
12657 } else {
12658 parser->current.start = parser->next_start;
12659 parser->current.end = parser->next_start;
12660 parser->heredoc_end = NULL;
12661 parser->next_start = NULL;
12662 }
12663
12664 // Now let's grab the information about the identifier off of the
12665 // current lex mode.
12666 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12667 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12668
12669 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12670 lex_mode->as.heredoc.line_continuation = false;
12671
12672 // We'll check if we're at the end of the file. If we are, then we
12673 // will add an error (because we weren't able to find the
12674 // terminator) but still continue parsing so that content after the
12675 // declaration of the heredoc can be parsed.
12676 if (parser->current.end >= parser->end) {
12677 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12678 parser->next_start = lex_mode->as.heredoc.next_start;
12679 parser->heredoc_end = parser->current.end;
12680 lex_state_set(parser, PM_LEX_STATE_END);
12681 lex_mode_pop(parser);
12682 LEX(PM_TOKEN_HEREDOC_END);
12683 }
12684
12685 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12686 size_t ident_length = heredoc_lex_mode->ident_length;
12687
12688 // If we are immediately following a newline and we have hit the
12689 // terminator, then we need to return the ending of the heredoc.
12690 if (current_token_starts_line(parser)) {
12691 const uint8_t *start = parser->current.start;
12692
12693 if (!line_continuation && (start + ident_length <= parser->end)) {
12694 const uint8_t *newline = next_newline(start, parser->end - start);
12695 const uint8_t *ident_end = newline;
12696 const uint8_t *terminator_end = newline;
12697
12698 if (newline == NULL) {
12699 terminator_end = parser->end;
12700 ident_end = parser->end;
12701 } else {
12702 terminator_end++;
12703 if (newline[-1] == '\r') {
12704 ident_end--; // Remove \r
12705 }
12706 }
12707
12708 const uint8_t *terminator_start = ident_end - ident_length;
12709 const uint8_t *cursor = start;
12710
12711 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12712 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12713 cursor++;
12714 }
12715 }
12716
12717 if (
12718 (cursor == terminator_start) &&
12719 (memcmp(terminator_start, ident_start, ident_length) == 0)
12720 ) {
12721 if (newline != NULL) {
12722 pm_newline_list_append(&parser->newline_list, newline);
12723 }
12724
12725 parser->current.end = terminator_end;
12726 if (*lex_mode->as.heredoc.next_start == '\\') {
12727 parser->next_start = NULL;
12728 } else {
12729 parser->next_start = lex_mode->as.heredoc.next_start;
12730 parser->heredoc_end = parser->current.end;
12731 }
12732
12733 lex_state_set(parser, PM_LEX_STATE_END);
12734 lex_mode_pop(parser);
12735 LEX(PM_TOKEN_HEREDOC_END);
12736 }
12737 }
12738
12739 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12740 if (
12741 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12742 lex_mode->as.heredoc.common_whitespace != NULL &&
12743 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12744 peek_at(parser, start) != '\n'
12745 ) {
12746 *lex_mode->as.heredoc.common_whitespace = whitespace;
12747 }
12748 }
12749
12750 // Otherwise we'll be parsing string content. These are the places
12751 // where we need to split up the content of the heredoc. We'll use
12752 // strpbrk to find the first of these characters.
12753 uint8_t breakpoints[] = "\r\n\\#";
12754
12755 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12756 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12757 breakpoints[3] = '\0';
12758 }
12759
12760 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12761 pm_token_buffer_t token_buffer = { 0 };
12762 bool was_line_continuation = false;
12763
12764 while (breakpoint != NULL) {
12765 switch (*breakpoint) {
12766 case '\0':
12767 // Skip directly past the null character.
12768 parser->current.end = breakpoint + 1;
12769 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12770 break;
12771 case '\r':
12772 parser->current.end = breakpoint + 1;
12773
12774 if (peek_at(parser, breakpoint + 1) != '\n') {
12775 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12776 break;
12777 }
12778
12779 // If we hit a \r\n sequence, then we want to replace it
12780 // with a single \n character in the final string.
12781 breakpoint++;
12782 pm_token_buffer_escape(parser, &token_buffer);
12783 token_buffer.cursor = breakpoint;
12784
12786 case '\n': {
12787 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12788 parser_flush_heredoc_end(parser);
12789 parser->current.end = breakpoint + 1;
12790 pm_token_buffer_flush(parser, &token_buffer);
12791 LEX(PM_TOKEN_STRING_CONTENT);
12792 }
12793
12794 pm_newline_list_append(&parser->newline_list, breakpoint);
12795
12796 // If we have a - or ~ heredoc, then we can match after
12797 // some leading whitespace.
12798 const uint8_t *start = breakpoint + 1;
12799
12800 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12801 // We want to match the terminator starting from the end of the line in case
12802 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12803 const uint8_t *newline = next_newline(start, parser->end - start);
12804
12805 if (newline == NULL) {
12806 newline = parser->end;
12807 } else if (newline[-1] == '\r') {
12808 newline--; // Remove \r
12809 }
12810
12811 // Start of a possible terminator.
12812 const uint8_t *terminator_start = newline - ident_length;
12813
12814 // Cursor to check for the leading whitespace. We skip the
12815 // leading whitespace if we have a - or ~ heredoc.
12816 const uint8_t *cursor = start;
12817
12818 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12819 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12820 cursor++;
12821 }
12822 }
12823
12824 if (
12825 cursor == terminator_start &&
12826 (memcmp(terminator_start, ident_start, ident_length) == 0)
12827 ) {
12828 parser->current.end = breakpoint + 1;
12829 pm_token_buffer_flush(parser, &token_buffer);
12830 LEX(PM_TOKEN_STRING_CONTENT);
12831 }
12832 }
12833
12834 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12835
12836 // If we have hit a newline that is followed by a valid
12837 // terminator, then we need to return the content of the
12838 // heredoc here as string content. Then, the next time a
12839 // token is lexed, it will match again and return the
12840 // end of the heredoc.
12841 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12842 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12843 *lex_mode->as.heredoc.common_whitespace = whitespace;
12844 }
12845
12846 parser->current.end = breakpoint + 1;
12847 pm_token_buffer_flush(parser, &token_buffer);
12848 LEX(PM_TOKEN_STRING_CONTENT);
12849 }
12850
12851 // Otherwise we hit a newline and it wasn't followed by
12852 // a terminator, so we can continue parsing.
12853 parser->current.end = breakpoint + 1;
12854 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12855 break;
12856 }
12857 case '\\': {
12858 // If we hit an escape, then we need to skip past
12859 // however many characters the escape takes up. However
12860 // it's important that if \n or \r\n are escaped, we
12861 // stop looping before the newline and not after the
12862 // newline so that we can still potentially find the
12863 // terminator of the heredoc.
12864 parser->current.end = breakpoint + 1;
12865
12866 // If we've hit the end of the file, then break out of
12867 // the loop by setting the breakpoint to NULL.
12868 if (parser->current.end == parser->end) {
12869 breakpoint = NULL;
12870 continue;
12871 }
12872
12873 pm_token_buffer_escape(parser, &token_buffer);
12874 uint8_t peeked = peek(parser);
12875
12876 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12877 switch (peeked) {
12878 case '\r':
12879 parser->current.end++;
12880 if (peek(parser) != '\n') {
12881 pm_token_buffer_push_byte(&token_buffer, '\\');
12882 pm_token_buffer_push_byte(&token_buffer, '\r');
12883 break;
12884 }
12886 case '\n':
12887 pm_token_buffer_push_byte(&token_buffer, '\\');
12888 pm_token_buffer_push_byte(&token_buffer, '\n');
12889 token_buffer.cursor = parser->current.end + 1;
12890 breakpoint = parser->current.end;
12891 continue;
12892 default:
12893 pm_token_buffer_push_byte(&token_buffer, '\\');
12894 pm_token_buffer_push_escaped(&token_buffer, parser);
12895 break;
12896 }
12897 } else {
12898 switch (peeked) {
12899 case '\r':
12900 parser->current.end++;
12901 if (peek(parser) != '\n') {
12902 pm_token_buffer_push_byte(&token_buffer, '\r');
12903 break;
12904 }
12906 case '\n':
12907 // If we are in a tilde here, we should
12908 // break out of the loop and return the
12909 // string content.
12910 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12911 const uint8_t *end = parser->current.end;
12912 pm_newline_list_append(&parser->newline_list, end);
12913
12914 // Here we want the buffer to only
12915 // include up to the backslash.
12916 parser->current.end = breakpoint;
12917 pm_token_buffer_flush(parser, &token_buffer);
12918
12919 // Now we can advance the end of the
12920 // token past the newline.
12921 parser->current.end = end + 1;
12922 lex_mode->as.heredoc.line_continuation = true;
12923 LEX(PM_TOKEN_STRING_CONTENT);
12924 }
12925
12926 was_line_continuation = true;
12927 token_buffer.cursor = parser->current.end + 1;
12928 breakpoint = parser->current.end;
12929 continue;
12930 default:
12931 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12932 break;
12933 }
12934 }
12935
12936 token_buffer.cursor = parser->current.end;
12937 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12938 break;
12939 }
12940 case '#': {
12941 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12942
12943 if (type == PM_TOKEN_NOT_PROVIDED) {
12944 // If we haven't returned at this point then we had
12945 // something that looked like an interpolated class
12946 // or instance variable like "#@" but wasn't
12947 // actually. In this case we'll just skip to the
12948 // next breakpoint.
12949 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12950 break;
12951 }
12952
12953 if (type == PM_TOKEN_STRING_CONTENT) {
12954 pm_token_buffer_flush(parser, &token_buffer);
12955 }
12956
12957 LEX(type);
12958 }
12959 default:
12960 assert(false && "unreachable");
12961 }
12962
12963 was_line_continuation = false;
12964 }
12965
12966 if (parser->current.end > parser->current.start) {
12967 parser->current.end = parser->end;
12968 pm_token_buffer_flush(parser, &token_buffer);
12969 LEX(PM_TOKEN_STRING_CONTENT);
12970 }
12971
12972 // If we've hit the end of the string, then this is an unterminated
12973 // heredoc. In that case we'll return a string content token.
12974 parser->current.end = parser->end;
12975 pm_token_buffer_flush(parser, &token_buffer);
12976 LEX(PM_TOKEN_STRING_CONTENT);
12977 }
12978 }
12979
12980 assert(false && "unreachable");
12981}
12982
12983#undef LEX
12984
12985/******************************************************************************/
12986/* Parse functions */
12987/******************************************************************************/
12988
12997typedef enum {
12998 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12999 PM_BINDING_POWER_STATEMENT = 2,
13000 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
13001 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
13002 PM_BINDING_POWER_COMPOSITION = 8, // and or
13003 PM_BINDING_POWER_NOT = 10, // not
13004 PM_BINDING_POWER_MATCH = 12, // => in
13005 PM_BINDING_POWER_DEFINED = 14, // defined?
13006 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
13007 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
13008 PM_BINDING_POWER_TERNARY = 20, // ?:
13009 PM_BINDING_POWER_RANGE = 22, // .. ...
13010 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
13011 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
13012 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
13013 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
13014 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
13015 PM_BINDING_POWER_BITWISE_AND = 34, // &
13016 PM_BINDING_POWER_SHIFT = 36, // << >>
13017 PM_BINDING_POWER_TERM = 38, // + -
13018 PM_BINDING_POWER_FACTOR = 40, // * / %
13019 PM_BINDING_POWER_UMINUS = 42, // -@
13020 PM_BINDING_POWER_EXPONENT = 44, // **
13021 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
13022 PM_BINDING_POWER_INDEX = 48, // [] []=
13023 PM_BINDING_POWER_CALL = 50, // :: .
13024 PM_BINDING_POWER_MAX = 52
13025} pm_binding_power_t;
13026
13031typedef struct {
13033 pm_binding_power_t left;
13034
13036 pm_binding_power_t right;
13037
13040
13047
13048#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
13049#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
13050#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
13051#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
13052#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
13053
13054pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
13055 // rescue
13056 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
13057
13058 // if unless until while
13059 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13060 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13061 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13062 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
13063
13064 // and or
13065 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13066 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
13067
13068 // => in
13069 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13070 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13071
13072 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
13073 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13074 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13075 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13076 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13077 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13078 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13079 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13080 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13081 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13082 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13083 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13084 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13085 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13086 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13087
13088 // ?:
13089 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13090
13091 // .. ...
13092 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13093 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13094 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13095 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13096
13097 // ||
13098 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13099
13100 // &&
13101 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13102
13103 // != !~ == === =~ <=>
13104 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13105 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13106 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13107 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13108 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13109 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13110
13111 // > >= < <=
13112 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13113 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13114 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13115 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13116
13117 // ^ |
13118 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13119 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13120
13121 // &
13122 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13123
13124 // >> <<
13125 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13126 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13127
13128 // - +
13129 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13130 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13131
13132 // % / *
13133 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13134 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13135 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13136 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13137
13138 // -@
13139 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13140 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13141
13142 // **
13143 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13144 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13145
13146 // ! ~ +@
13147 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13148 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13149 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13150
13151 // [
13152 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13153
13154 // :: . &.
13155 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13156 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13157 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13158};
13159
13160#undef BINDING_POWER_ASSIGNMENT
13161#undef LEFT_ASSOCIATIVE
13162#undef RIGHT_ASSOCIATIVE
13163#undef RIGHT_ASSOCIATIVE_UNARY
13164
13168static inline bool
13169match1(const pm_parser_t *parser, pm_token_type_t type) {
13170 return parser->current.type == type;
13171}
13172
13176static inline bool
13177match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13178 return match1(parser, type1) || match1(parser, type2);
13179}
13180
13184static inline bool
13185match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13186 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13187}
13188
13192static inline bool
13193match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13194 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13195}
13196
13200static inline bool
13201match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13202 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13203}
13204
13208static inline bool
13209match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13210 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13211}
13212
13219static bool
13220accept1(pm_parser_t *parser, pm_token_type_t type) {
13221 if (match1(parser, type)) {
13222 parser_lex(parser);
13223 return true;
13224 }
13225 return false;
13226}
13227
13232static inline bool
13233accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13234 if (match2(parser, type1, type2)) {
13235 parser_lex(parser);
13236 return true;
13237 }
13238 return false;
13239}
13240
13252static void
13253expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13254 if (accept1(parser, type)) return;
13255
13256 const uint8_t *location = parser->previous.end;
13257 pm_parser_err(parser, location, location, diag_id);
13258
13259 parser->previous.start = location;
13260 parser->previous.type = PM_TOKEN_MISSING;
13261}
13262
13267static void
13268expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13269 if (accept2(parser, type1, type2)) return;
13270
13271 const uint8_t *location = parser->previous.end;
13272 pm_parser_err(parser, location, location, diag_id);
13273
13274 parser->previous.start = location;
13275 parser->previous.type = PM_TOKEN_MISSING;
13276}
13277
13282static void
13283expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13284 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13285 parser_lex(parser);
13286 } else {
13287 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13288 parser->previous.start = parser->previous.end;
13289 parser->previous.type = PM_TOKEN_MISSING;
13290 }
13291}
13292
13293static pm_node_t *
13294parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13295
13300static pm_node_t *
13301parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13302 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13303 pm_assert_value_expression(parser, node);
13304 return node;
13305}
13306
13325static inline bool
13326token_begins_expression_p(pm_token_type_t type) {
13327 switch (type) {
13328 case PM_TOKEN_EQUAL_GREATER:
13329 case PM_TOKEN_KEYWORD_IN:
13330 // We need to special case this because it is a binary operator that
13331 // should not be marked as beginning an expression.
13332 return false;
13333 case PM_TOKEN_BRACE_RIGHT:
13334 case PM_TOKEN_BRACKET_RIGHT:
13335 case PM_TOKEN_COLON:
13336 case PM_TOKEN_COMMA:
13337 case PM_TOKEN_EMBEXPR_END:
13338 case PM_TOKEN_EOF:
13339 case PM_TOKEN_LAMBDA_BEGIN:
13340 case PM_TOKEN_KEYWORD_DO:
13341 case PM_TOKEN_KEYWORD_DO_LOOP:
13342 case PM_TOKEN_KEYWORD_END:
13343 case PM_TOKEN_KEYWORD_ELSE:
13344 case PM_TOKEN_KEYWORD_ELSIF:
13345 case PM_TOKEN_KEYWORD_ENSURE:
13346 case PM_TOKEN_KEYWORD_THEN:
13347 case PM_TOKEN_KEYWORD_RESCUE:
13348 case PM_TOKEN_KEYWORD_WHEN:
13349 case PM_TOKEN_NEWLINE:
13350 case PM_TOKEN_PARENTHESIS_RIGHT:
13351 case PM_TOKEN_SEMICOLON:
13352 // The reason we need this short-circuit is because we're using the
13353 // binding powers table to tell us if the subsequent token could
13354 // potentially be the start of an expression. If there _is_ a binding
13355 // power for one of these tokens, then we should remove it from this list
13356 // and let it be handled by the default case below.
13357 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13358 return false;
13359 case PM_TOKEN_UAMPERSAND:
13360 // This is a special case because this unary operator cannot appear
13361 // as a general operator, it only appears in certain circumstances.
13362 return false;
13363 case PM_TOKEN_UCOLON_COLON:
13364 case PM_TOKEN_UMINUS:
13365 case PM_TOKEN_UMINUS_NUM:
13366 case PM_TOKEN_UPLUS:
13367 case PM_TOKEN_BANG:
13368 case PM_TOKEN_TILDE:
13369 case PM_TOKEN_UDOT_DOT:
13370 case PM_TOKEN_UDOT_DOT_DOT:
13371 // These unary tokens actually do have binding power associated with them
13372 // so that we can correctly place them into the precedence order. But we
13373 // want them to be marked as beginning an expression, so we need to
13374 // special case them here.
13375 return true;
13376 default:
13377 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13378 }
13379}
13380
13385static pm_node_t *
13386parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13387 if (accept1(parser, PM_TOKEN_USTAR)) {
13388 pm_token_t operator = parser->previous;
13389 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13390 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13391 }
13392
13393 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13394}
13395
13400static void
13401parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13402 // The method name needs to change. If we previously had
13403 // foo, we now need foo=. In this case we'll allocate a new
13404 // owned string, copy the previous method name in, and
13405 // append an =.
13406 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13407 size_t length = constant->length;
13408 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13409 if (name == NULL) return;
13410
13411 memcpy(name, constant->start, length);
13412 name[length] = '=';
13413
13414 // Now switch the name to the new string.
13415 // This silences clang analyzer warning about leak of memory pointed by `name`.
13416 // NOLINTNEXTLINE(clang-analyzer-*)
13417 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13418}
13419
13426static pm_node_t *
13427parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13428 switch (PM_NODE_TYPE(target)) {
13429 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13430 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13431 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13432 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13433 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13434 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13435 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13436 default: break;
13437 }
13438
13439 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13440 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13441
13442 pm_node_destroy(parser, target);
13443 return (pm_node_t *) result;
13444}
13445
13451static void
13452parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13453 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13454
13455 for (size_t index = 0; index < implicit_parameters->size; index++) {
13456 if (implicit_parameters->nodes[index] == node) {
13457 // If the node is not the last one in the list, we need to shift the
13458 // remaining nodes down to fill the gap. This is extremely unlikely
13459 // to happen.
13460 if (index != implicit_parameters->size - 1) {
13461 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13462 }
13463
13464 implicit_parameters->size--;
13465 break;
13466 }
13467 }
13468}
13469
13478static pm_node_t *
13479parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13480 switch (PM_NODE_TYPE(target)) {
13481 case PM_MISSING_NODE:
13482 return target;
13483 case PM_SOURCE_ENCODING_NODE:
13484 case PM_FALSE_NODE:
13485 case PM_SOURCE_FILE_NODE:
13486 case PM_SOURCE_LINE_NODE:
13487 case PM_NIL_NODE:
13488 case PM_SELF_NODE:
13489 case PM_TRUE_NODE: {
13490 // In these special cases, we have specific error messages and we
13491 // will replace them with local variable writes.
13492 return parse_unwriteable_target(parser, target);
13493 }
13494 case PM_CLASS_VARIABLE_READ_NODE:
13496 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
13497 return target;
13498 case PM_CONSTANT_PATH_NODE:
13499 if (context_def_p(parser)) {
13500 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13501 }
13502
13504 target->type = PM_CONSTANT_PATH_TARGET_NODE;
13505
13506 return target;
13507 case PM_CONSTANT_READ_NODE:
13508 if (context_def_p(parser)) {
13509 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13510 }
13511
13512 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13513 target->type = PM_CONSTANT_TARGET_NODE;
13514
13515 return target;
13516 case PM_BACK_REFERENCE_READ_NODE:
13517 case PM_NUMBERED_REFERENCE_READ_NODE:
13518 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13519 return target;
13520 case PM_GLOBAL_VARIABLE_READ_NODE:
13522 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13523 return target;
13524 case PM_LOCAL_VARIABLE_READ_NODE: {
13525 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13526 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13527 parse_target_implicit_parameter(parser, target);
13528 }
13529
13530 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13531 uint32_t name = cast->name;
13532 uint32_t depth = cast->depth;
13533 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13534
13536 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13537
13538 return target;
13539 }
13540 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13541 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13542 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13543
13544 parse_target_implicit_parameter(parser, target);
13545 pm_node_destroy(parser, target);
13546
13547 return node;
13548 }
13549 case PM_INSTANCE_VARIABLE_READ_NODE:
13551 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13552 return target;
13553 case PM_MULTI_TARGET_NODE:
13554 if (splat_parent) {
13555 // Multi target is not accepted in all positions. If this is one
13556 // of them, then we need to add an error.
13557 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13558 }
13559
13560 return target;
13561 case PM_SPLAT_NODE: {
13562 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13563
13564 if (splat->expression != NULL) {
13565 splat->expression = parse_target(parser, splat->expression, multiple, true);
13566 }
13567
13568 return (pm_node_t *) splat;
13569 }
13570 case PM_CALL_NODE: {
13571 pm_call_node_t *call = (pm_call_node_t *) target;
13572
13573 // If we have no arguments to the call node and we need this to be a
13574 // target then this is either a method call or a local variable
13575 // write.
13576 if (
13577 (call->message_loc.start != NULL) &&
13578 (call->message_loc.end[-1] != '!') &&
13579 (call->message_loc.end[-1] != '?') &&
13580 (call->opening_loc.start == NULL) &&
13581 (call->arguments == NULL) &&
13582 (call->block == NULL)
13583 ) {
13584 if (call->receiver == NULL) {
13585 // When we get here, we have a local variable write, because it
13586 // was previously marked as a method call but now we have an =.
13587 // This looks like:
13588 //
13589 // foo = 1
13590 //
13591 // When it was parsed in the prefix position, foo was seen as a
13592 // method call with no receiver and no arguments. Now we have an
13593 // =, so we know it's a local variable write.
13594 const pm_location_t message_loc = call->message_loc;
13595
13596 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13597 pm_node_destroy(parser, target);
13598
13599 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13600 }
13601
13602 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13603 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13604 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13605 }
13606
13607 parse_write_name(parser, &call->name);
13608 return (pm_node_t *) pm_call_target_node_create(parser, call);
13609 }
13610 }
13611
13612 // If there is no call operator and the message is "[]" then this is
13613 // an aref expression, and we can transform it into an aset
13614 // expression.
13615 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13616 return (pm_node_t *) pm_index_target_node_create(parser, call);
13617 }
13618 }
13620 default:
13621 // In this case we have a node that we don't know how to convert
13622 // into a target. We need to treat it as an error. For now, we'll
13623 // mark it as an error and just skip right past it.
13624 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13625 return target;
13626 }
13627}
13628
13633static pm_node_t *
13634parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13635 pm_node_t *result = parse_target(parser, target, multiple, false);
13636
13637 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13638 // parens after the targets.
13639 if (
13640 !match1(parser, PM_TOKEN_EQUAL) &&
13641 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13642 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13643 ) {
13644 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13645 }
13646
13647 return result;
13648}
13649
13654static pm_node_t *
13655parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13656 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13657
13658 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13659 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13660 }
13661
13662 return write;
13663}
13664
13668static pm_node_t *
13669parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13670 switch (PM_NODE_TYPE(target)) {
13671 case PM_MISSING_NODE:
13672 pm_node_destroy(parser, value);
13673 return target;
13674 case PM_CLASS_VARIABLE_READ_NODE: {
13675 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13676 pm_node_destroy(parser, target);
13677 return (pm_node_t *) node;
13678 }
13679 case PM_CONSTANT_PATH_NODE: {
13680 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13681
13682 if (context_def_p(parser)) {
13683 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13684 }
13685
13686 return parse_shareable_constant_write(parser, node);
13687 }
13688 case PM_CONSTANT_READ_NODE: {
13689 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13690
13691 if (context_def_p(parser)) {
13692 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13693 }
13694
13695 pm_node_destroy(parser, target);
13696 return parse_shareable_constant_write(parser, node);
13697 }
13698 case PM_BACK_REFERENCE_READ_NODE:
13699 case PM_NUMBERED_REFERENCE_READ_NODE:
13700 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13702 case PM_GLOBAL_VARIABLE_READ_NODE: {
13703 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13704 pm_node_destroy(parser, target);
13705 return (pm_node_t *) node;
13706 }
13707 case PM_LOCAL_VARIABLE_READ_NODE: {
13709
13710 pm_constant_id_t name = local_read->name;
13711 pm_location_t name_loc = target->location;
13712
13713 uint32_t depth = local_read->depth;
13714 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13715
13716 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13717 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13718 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13719 parse_target_implicit_parameter(parser, target);
13720 }
13721
13722 pm_locals_unread(&scope->locals, name);
13723 pm_node_destroy(parser, target);
13724
13725 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13726 }
13727 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13728 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13729 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13730
13731 parse_target_implicit_parameter(parser, target);
13732 pm_node_destroy(parser, target);
13733
13734 return node;
13735 }
13736 case PM_INSTANCE_VARIABLE_READ_NODE: {
13737 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13738 pm_node_destroy(parser, target);
13739 return write_node;
13740 }
13741 case PM_MULTI_TARGET_NODE:
13742 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13743 case PM_SPLAT_NODE: {
13744 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13745
13746 if (splat->expression != NULL) {
13747 splat->expression = parse_write(parser, splat->expression, operator, value);
13748 }
13749
13750 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13751 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13752
13753 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13754 }
13755 case PM_CALL_NODE: {
13756 pm_call_node_t *call = (pm_call_node_t *) target;
13757
13758 // If we have no arguments to the call node and we need this to be a
13759 // target then this is either a method call or a local variable
13760 // write.
13761 if (
13762 (call->message_loc.start != NULL) &&
13763 (call->message_loc.end[-1] != '!') &&
13764 (call->message_loc.end[-1] != '?') &&
13765 (call->opening_loc.start == NULL) &&
13766 (call->arguments == NULL) &&
13767 (call->block == NULL)
13768 ) {
13769 if (call->receiver == NULL) {
13770 // When we get here, we have a local variable write, because it
13771 // was previously marked as a method call but now we have an =.
13772 // This looks like:
13773 //
13774 // foo = 1
13775 //
13776 // When it was parsed in the prefix position, foo was seen as a
13777 // method call with no receiver and no arguments. Now we have an
13778 // =, so we know it's a local variable write.
13779 const pm_location_t message = call->message_loc;
13780
13781 pm_parser_local_add_location(parser, message.start, message.end, 0);
13782 pm_node_destroy(parser, target);
13783
13784 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13785 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13786
13787 pm_refute_numbered_parameter(parser, message.start, message.end);
13788 return target;
13789 }
13790
13791 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13792 // When we get here, we have a method call, because it was
13793 // previously marked as a method call but now we have an =. This
13794 // looks like:
13795 //
13796 // foo.bar = 1
13797 //
13798 // When it was parsed in the prefix position, foo.bar was seen as a
13799 // method call with no arguments. Now we have an =, so we know it's
13800 // a method call with an argument. In this case we will create the
13801 // arguments node, parse the argument, and add it to the list.
13802 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13803 call->arguments = arguments;
13804
13805 pm_arguments_node_arguments_append(arguments, value);
13806 call->base.location.end = arguments->base.location.end;
13807
13808 parse_write_name(parser, &call->name);
13809 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13810
13811 return (pm_node_t *) call;
13812 }
13813 }
13814
13815 // If there is no call operator and the message is "[]" then this is
13816 // an aref expression, and we can transform it into an aset
13817 // expression.
13818 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13819 if (call->arguments == NULL) {
13820 call->arguments = pm_arguments_node_create(parser);
13821 }
13822
13823 pm_arguments_node_arguments_append(call->arguments, value);
13824 target->location.end = value->location.end;
13825
13826 // Replace the name with "[]=".
13827 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13828
13829 // Ensure that the arguments for []= don't contain keywords
13830 pm_index_arguments_check(parser, call->arguments, call->block);
13831 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13832
13833 return target;
13834 }
13835
13836 // If there are arguments on the call node, then it can't be a method
13837 // call ending with = or a local variable write, so it must be a
13838 // syntax error. In this case we'll fall through to our default
13839 // handling. We need to free the value that we parsed because there
13840 // is no way for us to attach it to the tree at this point.
13841 pm_node_destroy(parser, value);
13842 }
13844 default:
13845 // In this case we have a node that we don't know how to convert into a
13846 // target. We need to treat it as an error. For now, we'll mark it as an
13847 // error and just skip right past it.
13848 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13849 return target;
13850 }
13851}
13852
13859static pm_node_t *
13860parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13861 switch (PM_NODE_TYPE(target)) {
13862 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13863 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13864 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13865 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13866 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13867 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13868 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13869 default: break;
13870 }
13871
13872 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13873 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13874
13875 pm_node_destroy(parser, target);
13876 return (pm_node_t *) result;
13877}
13878
13889static pm_node_t *
13890parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13891 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13892
13893 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13894 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13895
13896 while (accept1(parser, PM_TOKEN_COMMA)) {
13897 if (accept1(parser, PM_TOKEN_USTAR)) {
13898 // Here we have a splat operator. It can have a name or be
13899 // anonymous. It can be the final target or be in the middle if
13900 // there haven't been any others yet.
13901 if (has_rest) {
13902 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13903 }
13904
13905 pm_token_t star_operator = parser->previous;
13906 pm_node_t *name = NULL;
13907
13908 if (token_begins_expression_p(parser->current.type)) {
13909 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13910 name = parse_target(parser, name, true, true);
13911 }
13912
13913 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13914 pm_multi_target_node_targets_append(parser, result, splat);
13915 has_rest = true;
13916 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13917 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13918 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13919 target = parse_target(parser, target, true, false);
13920
13921 pm_multi_target_node_targets_append(parser, result, target);
13922 context_pop(parser);
13923 } else if (token_begins_expression_p(parser->current.type)) {
13924 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13925 target = parse_target(parser, target, true, false);
13926
13927 pm_multi_target_node_targets_append(parser, result, target);
13928 } else if (!match1(parser, PM_TOKEN_EOF)) {
13929 // If we get here, then we have a trailing , in a multi target node.
13930 // We'll add an implicit rest node to represent this.
13931 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13932 pm_multi_target_node_targets_append(parser, result, rest);
13933 break;
13934 }
13935 }
13936
13937 return (pm_node_t *) result;
13938}
13939
13944static pm_node_t *
13945parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13946 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13947 accept1(parser, PM_TOKEN_NEWLINE);
13948
13949 // Ensure that we have either an = or a ) after the targets.
13950 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13951 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13952 }
13953
13954 return result;
13955}
13956
13960static pm_statements_node_t *
13961parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13962 // First, skip past any optional terminators that might be at the beginning
13963 // of the statements.
13964 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13965
13966 // If we have a terminator, then we can just return NULL.
13967 if (context_terminator(context, &parser->current)) return NULL;
13968
13969 pm_statements_node_t *statements = pm_statements_node_create(parser);
13970
13971 // At this point we know we have at least one statement, and that it
13972 // immediately follows the current token.
13973 context_push(parser, context);
13974
13975 while (true) {
13976 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13977 pm_statements_node_body_append(parser, statements, node, true);
13978
13979 // If we're recovering from a syntax error, then we need to stop parsing
13980 // the statements now.
13981 if (parser->recovering) {
13982 // If this is the level of context where the recovery has happened,
13983 // then we can mark the parser as done recovering.
13984 if (context_terminator(context, &parser->current)) parser->recovering = false;
13985 break;
13986 }
13987
13988 // If we have a terminator, then we will parse all consecutive
13989 // terminators and then continue parsing the statements list.
13990 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13991 // If we have a terminator, then we will continue parsing the
13992 // statements list.
13993 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13994 if (context_terminator(context, &parser->current)) break;
13995
13996 // Now we can continue parsing the list of statements.
13997 continue;
13998 }
13999
14000 // At this point we have a list of statements that are not terminated by
14001 // a newline or semicolon. At this point we need to check if we're at
14002 // the end of the statements list. If we are, then we should break out
14003 // of the loop.
14004 if (context_terminator(context, &parser->current)) break;
14005
14006 // At this point, we have a syntax error, because the statement was not
14007 // terminated by a newline or semicolon, and we're not at the end of the
14008 // statements list. Ideally we should scan forward to determine if we
14009 // should insert a missing terminator or break out of parsing the
14010 // statements list at this point.
14011 //
14012 // We don't have that yet, so instead we'll do a more naive approach. If
14013 // we were unable to parse an expression, then we will skip past this
14014 // token and continue parsing the statements list. Otherwise we'll add
14015 // an error and continue parsing the statements list.
14016 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
14017 parser_lex(parser);
14018
14019 // If we are at the end of the file, then we need to stop parsing
14020 // the statements entirely at this point. Mark the parser as
14021 // recovering, as we know that EOF closes the top-level context, and
14022 // then break out of the loop.
14023 if (match1(parser, PM_TOKEN_EOF)) {
14024 parser->recovering = true;
14025 break;
14026 }
14027
14028 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14029 if (context_terminator(context, &parser->current)) break;
14030 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
14031 // This is an inlined version of accept1 because the error that we
14032 // want to add has varargs. If this happens again, we should
14033 // probably extract a helper function.
14034 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14035 parser->previous.start = parser->previous.end;
14036 parser->previous.type = PM_TOKEN_MISSING;
14037 }
14038 }
14039
14040 context_pop(parser);
14041 bool last_value = true;
14042 switch (context) {
14045 last_value = false;
14046 break;
14047 default:
14048 break;
14049 }
14050 pm_void_statements_check(parser, statements, last_value);
14051
14052 return statements;
14053}
14054
14059static void
14060pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14061 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
14062
14063 if (duplicated != NULL) {
14064 pm_buffer_t buffer = { 0 };
14065 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
14066
14067 pm_diagnostic_list_append_format(
14068 &parser->warning_list,
14069 duplicated->location.start,
14070 duplicated->location.end,
14071 PM_WARN_DUPLICATED_HASH_KEY,
14072 (int) pm_buffer_length(&buffer),
14073 pm_buffer_value(&buffer),
14074 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14075 );
14076
14077 pm_buffer_free(&buffer);
14078 }
14079}
14080
14085static void
14086pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14087 pm_node_t *previous;
14088
14089 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14090 pm_diagnostic_list_append_format(
14091 &parser->warning_list,
14092 node->location.start,
14093 node->location.end,
14094 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14095 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14096 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14097 );
14098 }
14099}
14100
14104static bool
14105parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14106 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
14107 bool contains_keyword_splat = false;
14108
14109 while (true) {
14110 pm_node_t *element;
14111
14112 switch (parser->current.type) {
14113 case PM_TOKEN_USTAR_STAR: {
14114 parser_lex(parser);
14115 pm_token_t operator = parser->previous;
14116 pm_node_t *value = NULL;
14117
14118 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14119 // If we're about to parse a nested hash that is being
14120 // pushed into this hash directly with **, then we want the
14121 // inner hash to share the static literals with the outer
14122 // hash.
14123 parser->current_hash_keys = literals;
14124 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14125 } else if (token_begins_expression_p(parser->current.type)) {
14126 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14127 } else {
14128 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14129 }
14130
14131 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14132 contains_keyword_splat = true;
14133 break;
14134 }
14135 case PM_TOKEN_LABEL: {
14136 pm_token_t label = parser->current;
14137 parser_lex(parser);
14138
14139 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14140 pm_hash_key_static_literals_add(parser, literals, key);
14141
14142 pm_token_t operator = not_provided(parser);
14143 pm_node_t *value = NULL;
14144
14145 if (token_begins_expression_p(parser->current.type)) {
14146 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14147 } else {
14148 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14149 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14150 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14151 } else {
14152 int depth = -1;
14153 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14154
14155 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14156 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14157 } else {
14158 depth = pm_parser_local_depth(parser, &identifier);
14159 }
14160
14161 if (depth == -1) {
14162 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14163 } else {
14164 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14165 }
14166 }
14167
14168 value->location.end++;
14169 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14170 }
14171
14172 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14173 break;
14174 }
14175 default: {
14176 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14177
14178 // Hash keys that are strings are automatically frozen. We will
14179 // mark that here.
14180 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14181 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14182 }
14183
14184 pm_hash_key_static_literals_add(parser, literals, key);
14185
14186 pm_token_t operator;
14187 if (pm_symbol_node_label_p(key)) {
14188 operator = not_provided(parser);
14189 } else {
14190 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14191 operator = parser->previous;
14192 }
14193
14194 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14195 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14196 break;
14197 }
14198 }
14199
14200 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14201 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14202 } else {
14203 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14204 }
14205
14206 // If there's no comma after the element, then we're done.
14207 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14208
14209 // If the next element starts with a label or a **, then we know we have
14210 // another element in the hash, so we'll continue parsing.
14211 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14212
14213 // Otherwise we need to check if the subsequent token begins an expression.
14214 // If it does, then we'll continue parsing.
14215 if (token_begins_expression_p(parser->current.type)) continue;
14216
14217 // Otherwise by default we will exit out of this loop.
14218 break;
14219 }
14220
14221 return contains_keyword_splat;
14222}
14223
14227static inline void
14228parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14229 if (arguments->arguments == NULL) {
14230 arguments->arguments = pm_arguments_node_create(parser);
14231 }
14232
14233 pm_arguments_node_arguments_append(arguments->arguments, argument);
14234}
14235
14239static void
14240parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14241 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14242
14243 // First we need to check if the next token is one that could be the start
14244 // of an argument. If it's not, then we can just return.
14245 if (
14246 match2(parser, terminator, PM_TOKEN_EOF) ||
14247 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14248 context_terminator(parser->current_context->context, &parser->current)
14249 ) {
14250 return;
14251 }
14252
14253 bool parsed_first_argument = false;
14254 bool parsed_bare_hash = false;
14255 bool parsed_block_argument = false;
14256 bool parsed_forwarding_arguments = false;
14257
14258 while (!match1(parser, PM_TOKEN_EOF)) {
14259 if (parsed_forwarding_arguments) {
14260 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14261 }
14262
14263 pm_node_t *argument = NULL;
14264
14265 switch (parser->current.type) {
14266 case PM_TOKEN_USTAR_STAR:
14267 case PM_TOKEN_LABEL: {
14268 if (parsed_bare_hash) {
14269 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14270 }
14271
14272 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14273 argument = (pm_node_t *) hash;
14274
14275 pm_static_literals_t hash_keys = { 0 };
14276 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14277
14278 parse_arguments_append(parser, arguments, argument);
14279
14280 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14281 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14282 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14283
14284 pm_static_literals_free(&hash_keys);
14285 parsed_bare_hash = true;
14286
14287 break;
14288 }
14289 case PM_TOKEN_UAMPERSAND: {
14290 parser_lex(parser);
14291 pm_token_t operator = parser->previous;
14292 pm_node_t *expression = NULL;
14293
14294 if (token_begins_expression_p(parser->current.type)) {
14295 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14296 } else {
14297 pm_parser_scope_forwarding_block_check(parser, &operator);
14298 }
14299
14300 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14301 if (parsed_block_argument) {
14302 parse_arguments_append(parser, arguments, argument);
14303 } else {
14304 arguments->block = argument;
14305 }
14306
14307 if (match1(parser, PM_TOKEN_COMMA)) {
14308 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14309 }
14310
14311 parsed_block_argument = true;
14312 break;
14313 }
14314 case PM_TOKEN_USTAR: {
14315 parser_lex(parser);
14316 pm_token_t operator = parser->previous;
14317
14318 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14319 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14320 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14321 if (parsed_bare_hash) {
14322 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14323 }
14324 } else {
14325 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14326
14327 if (parsed_bare_hash) {
14328 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14329 }
14330
14331 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14332 }
14333
14334 parse_arguments_append(parser, arguments, argument);
14335 break;
14336 }
14337 case PM_TOKEN_UDOT_DOT_DOT: {
14338 if (accepts_forwarding) {
14339 parser_lex(parser);
14340
14341 if (token_begins_expression_p(parser->current.type)) {
14342 // If the token begins an expression then this ... was
14343 // not actually argument forwarding but was instead a
14344 // range.
14345 pm_token_t operator = parser->previous;
14346 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14347
14348 // If we parse a range, we need to validate that we
14349 // didn't accidentally violate the nonassoc rules of the
14350 // ... operator.
14351 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14352 pm_range_node_t *range = (pm_range_node_t *) right;
14353 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14354 }
14355
14356 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14357 } else {
14358 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14359 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14360 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14361 }
14362
14363 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14364 parse_arguments_append(parser, arguments, argument);
14365 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14366 arguments->has_forwarding = true;
14367 parsed_forwarding_arguments = true;
14368 break;
14369 }
14370 }
14371 }
14373 default: {
14374 if (argument == NULL) {
14375 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14376 }
14377
14378 bool contains_keywords = false;
14379 bool contains_keyword_splat = false;
14380
14381 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14382 if (parsed_bare_hash) {
14383 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14384 }
14385
14386 pm_token_t operator;
14387 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14388 operator = parser->previous;
14389 } else {
14390 operator = not_provided(parser);
14391 }
14392
14393 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14394 contains_keywords = true;
14395
14396 // Create the set of static literals for this hash.
14397 pm_static_literals_t hash_keys = { 0 };
14398 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14399
14400 // Finish parsing the one we are part way through.
14401 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14402 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14403
14404 pm_keyword_hash_node_elements_append(bare_hash, argument);
14405 argument = (pm_node_t *) bare_hash;
14406
14407 // Then parse more if we have a comma
14408 if (accept1(parser, PM_TOKEN_COMMA) && (
14409 token_begins_expression_p(parser->current.type) ||
14410 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14411 )) {
14412 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14413 }
14414
14415 pm_static_literals_free(&hash_keys);
14416 parsed_bare_hash = true;
14417 }
14418
14419 parse_arguments_append(parser, arguments, argument);
14420
14421 pm_node_flags_t flags = 0;
14422 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14423 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14424 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14425
14426 break;
14427 }
14428 }
14429
14430 parsed_first_argument = true;
14431
14432 // If parsing the argument failed, we need to stop parsing arguments.
14433 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14434
14435 // If the terminator of these arguments is not EOF, then we have a
14436 // specific token we're looking for. In that case we can accept a
14437 // newline here because it is not functioning as a statement terminator.
14438 bool accepted_newline = false;
14439 if (terminator != PM_TOKEN_EOF) {
14440 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14441 }
14442
14443 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14444 // If we previously were on a comma and we just parsed a bare hash,
14445 // then we want to continue parsing arguments. This is because the
14446 // comma was grabbed up by the hash parser.
14447 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14448 // If there was a comma, then we need to check if we also accepted a
14449 // newline. If we did, then this is a syntax error.
14450 if (accepted_newline) {
14451 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14452 }
14453
14454 // If this is a command call and an argument takes a block,
14455 // there can be no further arguments. For example,
14456 // `foo(bar 1 do end, 2)` should be rejected.
14457 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
14458 pm_call_node_t *call = (pm_call_node_t *) argument;
14459 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
14460 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14461 break;
14462 }
14463 }
14464 } else {
14465 // If there is no comma at the end of the argument list then we're
14466 // done parsing arguments and can break out of this loop.
14467 break;
14468 }
14469
14470 // If we hit the terminator, then that means we have a trailing comma so
14471 // we can accept that output as well.
14472 if (match1(parser, terminator)) break;
14473 }
14474}
14475
14487parse_required_destructured_parameter(pm_parser_t *parser) {
14488 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14489
14490 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14491 pm_multi_target_node_opening_set(node, &parser->previous);
14492
14493 do {
14494 pm_node_t *param;
14495
14496 // If we get here then we have a trailing comma, which isn't allowed in
14497 // the grammar. In other places, multi targets _do_ allow trailing
14498 // commas, so here we'll assume this is a mistake of the user not
14499 // knowing it's not allowed here.
14500 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14501 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14502 pm_multi_target_node_targets_append(parser, node, param);
14503 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14504 break;
14505 }
14506
14507 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14508 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14509 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14510 pm_token_t star = parser->previous;
14511 pm_node_t *value = NULL;
14512
14513 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14514 pm_token_t name = parser->previous;
14515 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14516 if (pm_parser_parameter_name_check(parser, &name)) {
14517 pm_node_flag_set_repeated_parameter(value);
14518 }
14519 pm_parser_local_add_token(parser, &name, 1);
14520 }
14521
14522 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14523 } else {
14524 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14525 pm_token_t name = parser->previous;
14526
14527 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14528 if (pm_parser_parameter_name_check(parser, &name)) {
14529 pm_node_flag_set_repeated_parameter(param);
14530 }
14531 pm_parser_local_add_token(parser, &name, 1);
14532 }
14533
14534 pm_multi_target_node_targets_append(parser, node, param);
14535 } while (accept1(parser, PM_TOKEN_COMMA));
14536
14537 accept1(parser, PM_TOKEN_NEWLINE);
14538 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14539 pm_multi_target_node_closing_set(node, &parser->previous);
14540
14541 return node;
14542}
14543
14548typedef enum {
14549 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14550 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14551 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14552 PM_PARAMETERS_ORDER_KEYWORDS,
14553 PM_PARAMETERS_ORDER_REST,
14554 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14555 PM_PARAMETERS_ORDER_OPTIONAL,
14556 PM_PARAMETERS_ORDER_NAMED,
14557 PM_PARAMETERS_ORDER_NONE,
14558} pm_parameters_order_t;
14559
14563static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14564 [0] = PM_PARAMETERS_NO_CHANGE,
14565 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14566 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14567 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14568 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14569 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14570 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14571 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14572 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14573 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14574 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14575 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14576};
14577
14585static bool
14586update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14587 pm_parameters_order_t state = parameters_ordering[token->type];
14588 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14589
14590 // If we see another ordered argument after a optional argument
14591 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14592 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14593 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14594 return true;
14595 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14596 return true;
14597 }
14598
14599 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14600 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14601 return false;
14602 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14603 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14604 return false;
14605 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14606 // We know what transition we failed on, so we can provide a better error here.
14607 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14608 return false;
14609 }
14610
14611 if (state < *current) *current = state;
14612 return true;
14613}
14614
14620static inline void
14621refute_optional_parameter(pm_parser_t *parser) {
14622 if (match1(parser, PM_TOKEN_EQUAL)) {
14623 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
14624 }
14625}
14626
14630static pm_parameters_node_t *
14631parse_parameters(
14632 pm_parser_t *parser,
14633 pm_binding_power_t binding_power,
14634 bool uses_parentheses,
14635 bool allows_trailing_comma,
14636 bool allows_forwarding_parameters,
14637 bool accepts_blocks_in_defaults,
14638 bool in_block,
14639 uint16_t depth
14640) {
14641 pm_do_loop_stack_push(parser, false);
14642
14643 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14644 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14645
14646 while (true) {
14647 bool parsing = true;
14648
14649 switch (parser->current.type) {
14650 case PM_TOKEN_PARENTHESIS_LEFT: {
14651 update_parameter_state(parser, &parser->current, &order);
14652 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14653
14654 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14655 pm_parameters_node_requireds_append(params, param);
14656 } else {
14657 pm_parameters_node_posts_append(params, param);
14658 }
14659 break;
14660 }
14661 case PM_TOKEN_UAMPERSAND:
14662 case PM_TOKEN_AMPERSAND: {
14663 update_parameter_state(parser, &parser->current, &order);
14664 parser_lex(parser);
14665
14666 pm_token_t operator = parser->previous;
14667 pm_token_t name;
14668
14669 bool repeated = false;
14670 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14671 name = parser->previous;
14672 repeated = pm_parser_parameter_name_check(parser, &name);
14673 pm_parser_local_add_token(parser, &name, 1);
14674 } else {
14675 name = not_provided(parser);
14676 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14677 }
14678
14679 if (!uses_parentheses) {
14680 refute_optional_parameter(parser);
14681 }
14682
14683 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14684 if (repeated) {
14685 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14686 }
14687 if (params->block == NULL) {
14688 pm_parameters_node_block_set(params, param);
14689 } else {
14690 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14691 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14692 }
14693
14694 break;
14695 }
14696 case PM_TOKEN_UDOT_DOT_DOT: {
14697 if (!allows_forwarding_parameters) {
14698 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14699 }
14700
14701 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14702 parser_lex(parser);
14703
14704 if (!uses_parentheses) {
14705 refute_optional_parameter(parser);
14706 }
14707
14708 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14709 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14710
14711 if (params->keyword_rest != NULL) {
14712 // If we already have a keyword rest parameter, then we replace it with the
14713 // forwarding parameter and move the keyword rest parameter to the posts list.
14714 pm_node_t *keyword_rest = params->keyword_rest;
14715 pm_parameters_node_posts_append(params, keyword_rest);
14716 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14717 params->keyword_rest = NULL;
14718 }
14719
14720 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14721 break;
14722 }
14723 case PM_TOKEN_CLASS_VARIABLE:
14724 case PM_TOKEN_IDENTIFIER:
14725 case PM_TOKEN_CONSTANT:
14726 case PM_TOKEN_INSTANCE_VARIABLE:
14727 case PM_TOKEN_GLOBAL_VARIABLE:
14728 case PM_TOKEN_METHOD_NAME: {
14729 parser_lex(parser);
14730 switch (parser->previous.type) {
14731 case PM_TOKEN_CONSTANT:
14732 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14733 break;
14734 case PM_TOKEN_INSTANCE_VARIABLE:
14735 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14736 break;
14737 case PM_TOKEN_GLOBAL_VARIABLE:
14738 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14739 break;
14740 case PM_TOKEN_CLASS_VARIABLE:
14741 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14742 break;
14743 case PM_TOKEN_METHOD_NAME:
14744 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14745 break;
14746 default: break;
14747 }
14748
14749 if (parser->current.type == PM_TOKEN_EQUAL) {
14750 update_parameter_state(parser, &parser->current, &order);
14751 } else {
14752 update_parameter_state(parser, &parser->previous, &order);
14753 }
14754
14755 pm_token_t name = parser->previous;
14756 bool repeated = pm_parser_parameter_name_check(parser, &name);
14757 pm_parser_local_add_token(parser, &name, 1);
14758
14759 if (match1(parser, PM_TOKEN_EQUAL)) {
14760 pm_token_t operator = parser->current;
14761 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14762 parser_lex(parser);
14763
14764 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14765 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14766
14767 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14768 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14769 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14770
14771 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14772
14773 if (repeated) {
14774 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14775 }
14776 pm_parameters_node_optionals_append(params, param);
14777
14778 // If the value of the parameter increased the number of
14779 // reads of that parameter, then we need to warn that we
14780 // have a circular definition.
14781 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14782 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14783 }
14784
14785 context_pop(parser);
14786
14787 // If parsing the value of the parameter resulted in error recovery,
14788 // then we can put a missing node in its place and stop parsing the
14789 // parameters entirely now.
14790 if (parser->recovering) {
14791 parsing = false;
14792 break;
14793 }
14794 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14795 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14796 if (repeated) {
14797 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14798 }
14799 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14800 } else {
14801 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14802 if (repeated) {
14803 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14804 }
14805 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14806 }
14807
14808 break;
14809 }
14810 case PM_TOKEN_LABEL: {
14811 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14812 update_parameter_state(parser, &parser->current, &order);
14813
14814 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14815 parser_lex(parser);
14816
14817 pm_token_t name = parser->previous;
14818 pm_token_t local = name;
14819 local.end -= 1;
14820
14821 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14822 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14823 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14824 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14825 }
14826
14827 bool repeated = pm_parser_parameter_name_check(parser, &local);
14828 pm_parser_local_add_token(parser, &local, 1);
14829
14830 switch (parser->current.type) {
14831 case PM_TOKEN_COMMA:
14832 case PM_TOKEN_PARENTHESIS_RIGHT:
14833 case PM_TOKEN_PIPE: {
14834 context_pop(parser);
14835
14836 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14837 if (repeated) {
14838 pm_node_flag_set_repeated_parameter(param);
14839 }
14840
14841 pm_parameters_node_keywords_append(params, param);
14842 break;
14843 }
14844 case PM_TOKEN_SEMICOLON:
14845 case PM_TOKEN_NEWLINE: {
14846 context_pop(parser);
14847
14848 if (uses_parentheses) {
14849 parsing = false;
14850 break;
14851 }
14852
14853 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14854 if (repeated) {
14855 pm_node_flag_set_repeated_parameter(param);
14856 }
14857
14858 pm_parameters_node_keywords_append(params, param);
14859 break;
14860 }
14861 default: {
14862 pm_node_t *param;
14863
14864 if (token_begins_expression_p(parser->current.type)) {
14865 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14866 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14867
14868 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14869 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14870 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14871
14872 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14873 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14874 }
14875
14876 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14877 }
14878 else {
14879 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14880 }
14881
14882 if (repeated) {
14883 pm_node_flag_set_repeated_parameter(param);
14884 }
14885
14886 context_pop(parser);
14887 pm_parameters_node_keywords_append(params, param);
14888
14889 if (!uses_parentheses) {
14890 refute_optional_parameter(parser);
14891 }
14892
14893 // If parsing the value of the parameter resulted in error recovery,
14894 // then we can put a missing node in its place and stop parsing the
14895 // parameters entirely now.
14896 if (parser->recovering) {
14897 parsing = false;
14898 break;
14899 }
14900 }
14901 }
14902
14903 parser->in_keyword_arg = false;
14904 break;
14905 }
14906 case PM_TOKEN_USTAR:
14907 case PM_TOKEN_STAR: {
14908 update_parameter_state(parser, &parser->current, &order);
14909 parser_lex(parser);
14910
14911 pm_token_t operator = parser->previous;
14912 pm_token_t name;
14913 bool repeated = false;
14914
14915 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14916 name = parser->previous;
14917 repeated = pm_parser_parameter_name_check(parser, &name);
14918 pm_parser_local_add_token(parser, &name, 1);
14919 } else {
14920 name = not_provided(parser);
14921 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14922 }
14923
14924 if (!uses_parentheses) {
14925 refute_optional_parameter(parser);
14926 }
14927
14928 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14929 if (repeated) {
14930 pm_node_flag_set_repeated_parameter(param);
14931 }
14932
14933 if (params->rest == NULL) {
14934 pm_parameters_node_rest_set(params, param);
14935 } else {
14936 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14937 pm_parameters_node_posts_append(params, param);
14938 }
14939
14940 break;
14941 }
14942 case PM_TOKEN_STAR_STAR:
14943 case PM_TOKEN_USTAR_STAR: {
14944 pm_parameters_order_t previous_order = order;
14945 update_parameter_state(parser, &parser->current, &order);
14946 parser_lex(parser);
14947
14948 pm_token_t operator = parser->previous;
14949 pm_node_t *param;
14950
14951 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14952 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14953 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14954 }
14955
14956 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14957 } else {
14958 pm_token_t name;
14959
14960 bool repeated = false;
14961 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14962 name = parser->previous;
14963 repeated = pm_parser_parameter_name_check(parser, &name);
14964 pm_parser_local_add_token(parser, &name, 1);
14965 } else {
14966 name = not_provided(parser);
14967 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14968 }
14969
14970 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14971 if (repeated) {
14972 pm_node_flag_set_repeated_parameter(param);
14973 }
14974 }
14975
14976 if (!uses_parentheses) {
14977 refute_optional_parameter(parser);
14978 }
14979
14980 if (params->keyword_rest == NULL) {
14981 pm_parameters_node_keyword_rest_set(params, param);
14982 } else {
14983 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14984 pm_parameters_node_posts_append(params, param);
14985 }
14986
14987 break;
14988 }
14989 default:
14990 if (parser->previous.type == PM_TOKEN_COMMA) {
14991 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14992 // If we get here, then we have a trailing comma in a
14993 // block parameter list.
14994 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14995
14996 if (params->rest == NULL) {
14997 pm_parameters_node_rest_set(params, param);
14998 } else {
14999 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
15000 pm_parameters_node_posts_append(params, (pm_node_t *) param);
15001 }
15002 } else {
15003 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
15004 }
15005 }
15006
15007 parsing = false;
15008 break;
15009 }
15010
15011 // If we hit some kind of issue while parsing the parameter, this would
15012 // have been set to false. In that case, we need to break out of the
15013 // loop.
15014 if (!parsing) break;
15015
15016 bool accepted_newline = false;
15017 if (uses_parentheses) {
15018 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
15019 }
15020
15021 if (accept1(parser, PM_TOKEN_COMMA)) {
15022 // If there was a comma, but we also accepted a newline, then this
15023 // is a syntax error.
15024 if (accepted_newline) {
15025 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
15026 }
15027 } else {
15028 // If there was no comma, then we're done parsing parameters.
15029 break;
15030 }
15031 }
15032
15033 pm_do_loop_stack_pop(parser);
15034
15035 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
15036 if (params->base.location.start == params->base.location.end) {
15037 pm_node_destroy(parser, (pm_node_t *) params);
15038 return NULL;
15039 }
15040
15041 return params;
15042}
15043
15048static size_t
15049token_newline_index(const pm_parser_t *parser) {
15050 if (parser->heredoc_end == NULL) {
15051 // This is the common case. In this case we can look at the previously
15052 // recorded newline in the newline list and subtract from the current
15053 // offset.
15054 return parser->newline_list.size - 1;
15055 } else {
15056 // This is unlikely. This is the case that we have already parsed the
15057 // start of a heredoc, so we cannot rely on looking at the previous
15058 // offset of the newline list, and instead must go through the whole
15059 // process of a binary search for the line number.
15060 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
15061 }
15062}
15063
15068static int64_t
15069token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
15070 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
15071 const uint8_t *end = token->start;
15072
15073 // Skip over the BOM if it is present.
15074 if (
15075 newline_index == 0 &&
15076 parser->start[0] == 0xef &&
15077 parser->start[1] == 0xbb &&
15078 parser->start[2] == 0xbf
15079 ) cursor += 3;
15080
15081 int64_t column = 0;
15082 for (; cursor < end; cursor++) {
15083 switch (*cursor) {
15084 case '\t':
15085 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
15086 break;
15087 case ' ':
15088 column++;
15089 break;
15090 default:
15091 column++;
15092 if (break_on_non_space) return -1;
15093 break;
15094 }
15095 }
15096
15097 return column;
15098}
15099
15104static void
15105parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
15106 // If these warnings are disabled (unlikely), then we can just return.
15107 if (!parser->warn_mismatched_indentation) return;
15108
15109 // If the tokens are on the same line, we do not warn.
15110 size_t closing_newline_index = token_newline_index(parser);
15111 if (opening_newline_index == closing_newline_index) return;
15112
15113 // If the opening token has anything other than spaces or tabs before it,
15114 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15115 // and the `if` immediately follows an `else` keyword.
15116 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15117 if (!if_after_else && (opening_column == -1)) return;
15118
15119 // Get a reference to the closing token off the current parser. This assumes
15120 // that the caller has placed this in the correct position.
15121 pm_token_t *closing_token = &parser->current;
15122
15123 // If the tokens are at the same indentation, we do not warn.
15124 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15125 if ((closing_column == -1) || (opening_column == closing_column)) return;
15126
15127 // If the closing column is greater than the opening column and we are
15128 // allowing indentation, then we do not warn.
15129 if (allow_indent && (closing_column > opening_column)) return;
15130
15131 // Otherwise, add a warning.
15132 PM_PARSER_WARN_FORMAT(
15133 parser,
15134 closing_token->start,
15135 closing_token->end,
15136 PM_WARN_INDENTATION_MISMATCH,
15137 (int) (closing_token->end - closing_token->start),
15138 (const char *) closing_token->start,
15139 (int) (opening_token->end - opening_token->start),
15140 (const char *) opening_token->start,
15141 ((int32_t) opening_newline_index) + parser->start_line
15142 );
15143}
15144
15145typedef enum {
15146 PM_RESCUES_BEGIN = 1,
15147 PM_RESCUES_BLOCK,
15148 PM_RESCUES_CLASS,
15149 PM_RESCUES_DEF,
15150 PM_RESCUES_LAMBDA,
15151 PM_RESCUES_MODULE,
15152 PM_RESCUES_SCLASS
15153} pm_rescues_type_t;
15154
15159static inline void
15160parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15161 pm_rescue_node_t *current = NULL;
15162
15163 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15164 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15165 parser_lex(parser);
15166
15167 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15168
15169 switch (parser->current.type) {
15170 case PM_TOKEN_EQUAL_GREATER: {
15171 // Here we have an immediate => after the rescue keyword, in which case
15172 // we're going to have an empty list of exceptions to rescue (which
15173 // implies StandardError).
15174 parser_lex(parser);
15175 pm_rescue_node_operator_set(rescue, &parser->previous);
15176
15177 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15178 reference = parse_target(parser, reference, false, false);
15179
15180 pm_rescue_node_reference_set(rescue, reference);
15181 break;
15182 }
15183 case PM_TOKEN_NEWLINE:
15184 case PM_TOKEN_SEMICOLON:
15185 case PM_TOKEN_KEYWORD_THEN:
15186 // Here we have a terminator for the rescue keyword, in which
15187 // case we're going to just continue on.
15188 break;
15189 default: {
15190 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15191 // Here we have something that could be an exception expression, so
15192 // we'll attempt to parse it here and any others delimited by commas.
15193
15194 do {
15195 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15196 pm_rescue_node_exceptions_append(rescue, expression);
15197
15198 // If we hit a newline, then this is the end of the rescue expression. We
15199 // can continue on to parse the statements.
15200 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15201
15202 // If we hit a `=>` then we're going to parse the exception variable. Once
15203 // we've done that, we'll break out of the loop and parse the statements.
15204 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15205 pm_rescue_node_operator_set(rescue, &parser->previous);
15206
15207 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15208 reference = parse_target(parser, reference, false, false);
15209
15210 pm_rescue_node_reference_set(rescue, reference);
15211 break;
15212 }
15213 } while (accept1(parser, PM_TOKEN_COMMA));
15214 }
15215 }
15216 }
15217
15218 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15219 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15220 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15221 }
15222 } else {
15223 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15224 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15225 }
15226
15227 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
15228 pm_accepts_block_stack_push(parser, true);
15229 pm_context_t context;
15230
15231 switch (type) {
15232 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15233 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15234 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15235 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15236 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15237 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15238 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15239 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15240 }
15241
15242 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15243 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15244
15245 pm_accepts_block_stack_pop(parser);
15246 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15247 }
15248
15249 if (current == NULL) {
15250 pm_begin_node_rescue_clause_set(parent_node, rescue);
15251 } else {
15252 pm_rescue_node_subsequent_set(current, rescue);
15253 }
15254
15255 current = rescue;
15256 }
15257
15258 // The end node locations on rescue nodes will not be set correctly
15259 // since we won't know the end until we've found all subsequent
15260 // clauses. This sets the end location on all rescues once we know it.
15261 if (current != NULL) {
15262 const uint8_t *end_to_set = current->base.location.end;
15263 pm_rescue_node_t *clause = parent_node->rescue_clause;
15264
15265 while (clause != NULL) {
15266 clause->base.location.end = end_to_set;
15267 clause = clause->subsequent;
15268 }
15269 }
15270
15271 pm_token_t else_keyword;
15272 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15273 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15274 opening_newline_index = token_newline_index(parser);
15275
15276 else_keyword = parser->current;
15277 opening = &else_keyword;
15278
15279 parser_lex(parser);
15280 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15281
15282 pm_statements_node_t *else_statements = NULL;
15283 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15284 pm_accepts_block_stack_push(parser, true);
15285 pm_context_t context;
15286
15287 switch (type) {
15288 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15289 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15290 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15291 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15292 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15293 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15294 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15295 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15296 }
15297
15298 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15299 pm_accepts_block_stack_pop(parser);
15300
15301 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15302 }
15303
15304 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15305 pm_begin_node_else_clause_set(parent_node, else_clause);
15306
15307 // If we don't have a `current` rescue node, then this is a dangling
15308 // else, and it's an error.
15309 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15310 }
15311
15312 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15313 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15314 pm_token_t ensure_keyword = parser->current;
15315
15316 parser_lex(parser);
15317 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15318
15319 pm_statements_node_t *ensure_statements = NULL;
15320 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15321 pm_accepts_block_stack_push(parser, true);
15322 pm_context_t context;
15323
15324 switch (type) {
15325 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15326 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15327 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15328 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15329 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15330 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15331 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15332 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15333 }
15334
15335 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15336 pm_accepts_block_stack_pop(parser);
15337
15338 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15339 }
15340
15341 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15342 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15343 }
15344
15345 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15346 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15347 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15348 } else {
15349 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15350 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15351 }
15352}
15353
15358static pm_begin_node_t *
15359parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15360 pm_token_t begin_keyword = not_provided(parser);
15361 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15362
15363 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15364 node->base.location.start = start;
15365
15366 return node;
15367}
15368
15373parse_block_parameters(
15374 pm_parser_t *parser,
15375 bool allows_trailing_comma,
15376 const pm_token_t *opening,
15377 bool is_lambda_literal,
15378 bool accepts_blocks_in_defaults,
15379 uint16_t depth
15380) {
15381 pm_parameters_node_t *parameters = NULL;
15382 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15383 parameters = parse_parameters(
15384 parser,
15385 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15386 false,
15387 allows_trailing_comma,
15388 false,
15389 accepts_blocks_in_defaults,
15390 true,
15391 (uint16_t) (depth + 1)
15392 );
15393 }
15394
15395 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15396 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15397 accept1(parser, PM_TOKEN_NEWLINE);
15398
15399 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15400 do {
15401 switch (parser->current.type) {
15402 case PM_TOKEN_CONSTANT:
15403 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15404 parser_lex(parser);
15405 break;
15406 case PM_TOKEN_INSTANCE_VARIABLE:
15407 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15408 parser_lex(parser);
15409 break;
15410 case PM_TOKEN_GLOBAL_VARIABLE:
15411 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15412 parser_lex(parser);
15413 break;
15414 case PM_TOKEN_CLASS_VARIABLE:
15415 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15416 parser_lex(parser);
15417 break;
15418 default:
15419 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15420 break;
15421 }
15422
15423 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15424 pm_parser_local_add_token(parser, &parser->previous, 1);
15425
15426 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15427 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15428
15429 pm_block_parameters_node_append_local(block_parameters, local);
15430 } while (accept1(parser, PM_TOKEN_COMMA));
15431 }
15432 }
15433
15434 return block_parameters;
15435}
15436
15441static bool
15442outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15443 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15444 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15445 }
15446
15447 return false;
15448}
15449
15455static const char * const pm_numbered_parameter_names[] = {
15456 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15457};
15458
15464static pm_node_t *
15465parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15466 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15467
15468 // If we have ordinary parameters, then we will return them as the set of
15469 // parameters.
15470 if (parameters != NULL) {
15471 // If we also have implicit parameters, then this is an error.
15472 if (implicit_parameters->size > 0) {
15473 pm_node_t *node = implicit_parameters->nodes[0];
15474
15475 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15476 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15477 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15478 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15479 } else {
15480 assert(false && "unreachable");
15481 }
15482 }
15483
15484 return parameters;
15485 }
15486
15487 // If we don't have any implicit parameters, then the set of parameters is
15488 // NULL.
15489 if (implicit_parameters->size == 0) {
15490 return NULL;
15491 }
15492
15493 // If we don't have ordinary parameters, then we now must validate our set
15494 // of implicit parameters. We can only have numbered parameters or it, but
15495 // they cannot be mixed.
15496 uint8_t numbered_parameter = 0;
15497 bool it_parameter = false;
15498
15499 for (size_t index = 0; index < implicit_parameters->size; index++) {
15500 pm_node_t *node = implicit_parameters->nodes[index];
15501
15502 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15503 if (it_parameter) {
15504 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15505 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15506 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15507 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15508 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15509 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15510 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15511 } else {
15512 assert(false && "unreachable");
15513 }
15514 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15515 if (numbered_parameter > 0) {
15516 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15517 } else {
15518 it_parameter = true;
15519 }
15520 }
15521 }
15522
15523 if (numbered_parameter > 0) {
15524 // Go through the parent scopes and mark them as being disallowed from
15525 // using numbered parameters because this inner scope is using them.
15526 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15527 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15528 }
15529
15530 const pm_location_t location = { .start = opening->start, .end = closing->end };
15531 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15532 }
15533
15534 if (it_parameter) {
15535 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15536 }
15537
15538 return NULL;
15539}
15540
15544static pm_block_node_t *
15545parse_block(pm_parser_t *parser, uint16_t depth) {
15546 pm_token_t opening = parser->previous;
15547 accept1(parser, PM_TOKEN_NEWLINE);
15548
15549 pm_accepts_block_stack_push(parser, true);
15550 pm_parser_scope_push(parser, false);
15551
15552 pm_block_parameters_node_t *block_parameters = NULL;
15553
15554 if (accept1(parser, PM_TOKEN_PIPE)) {
15555 pm_token_t block_parameters_opening = parser->previous;
15556 if (match1(parser, PM_TOKEN_PIPE)) {
15557 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15558 parser->command_start = true;
15559 parser_lex(parser);
15560 } else {
15561 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15562 accept1(parser, PM_TOKEN_NEWLINE);
15563 parser->command_start = true;
15564 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15565 }
15566
15567 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15568 }
15569
15570 accept1(parser, PM_TOKEN_NEWLINE);
15571 pm_node_t *statements = NULL;
15572
15573 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15574 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15575 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15576 }
15577
15578 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15579 } else {
15580 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15581 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15582 pm_accepts_block_stack_push(parser, true);
15583 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15584 pm_accepts_block_stack_pop(parser);
15585 }
15586
15587 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15588 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15589 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15590 }
15591 }
15592
15593 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15594 }
15595
15596 pm_constant_id_list_t locals;
15597 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15598 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15599
15600 pm_parser_scope_pop(parser);
15601 pm_accepts_block_stack_pop(parser);
15602
15603 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15604}
15605
15611static bool
15612parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15613 bool found = false;
15614
15615 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15616 found |= true;
15617 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15618
15619 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15620 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15621 } else {
15622 pm_accepts_block_stack_push(parser, true);
15623 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15624
15625 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15626 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15627 parser->previous.start = parser->previous.end;
15628 parser->previous.type = PM_TOKEN_MISSING;
15629 }
15630
15631 pm_accepts_block_stack_pop(parser);
15632 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15633 }
15634 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15635 found |= true;
15636 pm_accepts_block_stack_push(parser, false);
15637
15638 // If we get here, then the subsequent token cannot be used as an infix
15639 // operator. In this case we assume the subsequent token is part of an
15640 // argument to this method call.
15641 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15642
15643 // If we have done with the arguments and still not consumed the comma,
15644 // then we have a trailing comma where we need to check whether it is
15645 // allowed or not.
15646 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15647 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15648 }
15649
15650 pm_accepts_block_stack_pop(parser);
15651 }
15652
15653 // If we're at the end of the arguments, we can now check if there is a block
15654 // node that starts with a {. If there is, then we can parse it and add it to
15655 // the arguments.
15656 if (accepts_block) {
15657 pm_block_node_t *block = NULL;
15658
15659 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15660 found |= true;
15661 block = parse_block(parser, (uint16_t) (depth + 1));
15662 pm_arguments_validate_block(parser, arguments, block);
15663 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15664 found |= true;
15665 block = parse_block(parser, (uint16_t) (depth + 1));
15666 }
15667
15668 if (block != NULL) {
15669 if (arguments->block == NULL && !arguments->has_forwarding) {
15670 arguments->block = (pm_node_t *) block;
15671 } else {
15672 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15673
15674 if (arguments->block != NULL) {
15675 if (arguments->arguments == NULL) {
15676 arguments->arguments = pm_arguments_node_create(parser);
15677 }
15678 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15679 }
15680 arguments->block = (pm_node_t *) block;
15681 }
15682 }
15683 }
15684
15685 return found;
15686}
15687
15692static void
15693parse_return(pm_parser_t *parser, pm_node_t *node) {
15694 bool in_sclass = false;
15695 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15696 switch (context_node->context) {
15700 case PM_CONTEXT_BEGIN:
15701 case PM_CONTEXT_CASE_IN:
15704 case PM_CONTEXT_DEFINED:
15705 case PM_CONTEXT_ELSE:
15706 case PM_CONTEXT_ELSIF:
15707 case PM_CONTEXT_EMBEXPR:
15709 case PM_CONTEXT_FOR:
15710 case PM_CONTEXT_IF:
15712 case PM_CONTEXT_MAIN:
15714 case PM_CONTEXT_PARENS:
15715 case PM_CONTEXT_POSTEXE:
15717 case PM_CONTEXT_PREEXE:
15719 case PM_CONTEXT_TERNARY:
15720 case PM_CONTEXT_UNLESS:
15721 case PM_CONTEXT_UNTIL:
15722 case PM_CONTEXT_WHILE:
15723 // Keep iterating up the lists of contexts, because returns can
15724 // see through these.
15725 continue;
15729 case PM_CONTEXT_SCLASS:
15730 in_sclass = true;
15731 continue;
15735 case PM_CONTEXT_CLASS:
15739 case PM_CONTEXT_MODULE:
15740 // These contexts are invalid for a return.
15741 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15742 return;
15752 case PM_CONTEXT_DEF:
15758 // These contexts are valid for a return, and we should not
15759 // continue to loop.
15760 return;
15761 case PM_CONTEXT_NONE:
15762 // This case should never happen.
15763 assert(false && "unreachable");
15764 break;
15765 }
15766 }
15767 if (in_sclass) {
15768 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15769 }
15770}
15771
15776static void
15777parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15778 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15779 switch (context_node->context) {
15785 case PM_CONTEXT_DEFINED:
15786 case PM_CONTEXT_FOR:
15793 case PM_CONTEXT_POSTEXE:
15794 case PM_CONTEXT_UNTIL:
15795 case PM_CONTEXT_WHILE:
15796 // These are the good cases. We're allowed to have a block exit
15797 // in these contexts.
15798 return;
15799 case PM_CONTEXT_DEF:
15804 case PM_CONTEXT_MAIN:
15805 case PM_CONTEXT_PREEXE:
15806 case PM_CONTEXT_SCLASS:
15810 // These are the bad cases. We're not allowed to have a block
15811 // exit in these contexts.
15812 //
15813 // If we get here, then we're about to mark this block exit
15814 // as invalid. However, it could later _become_ valid if we
15815 // find a trailing while/until on the expression. In this
15816 // case instead of adding the error here, we'll add the
15817 // block exit to the list of exits for the expression, and
15818 // the node parsing will handle validating it instead.
15819 assert(parser->current_block_exits != NULL);
15820 pm_node_list_append(parser->current_block_exits, node);
15821 return;
15825 case PM_CONTEXT_BEGIN:
15826 case PM_CONTEXT_CASE_IN:
15831 case PM_CONTEXT_CLASS:
15833 case PM_CONTEXT_ELSE:
15834 case PM_CONTEXT_ELSIF:
15835 case PM_CONTEXT_EMBEXPR:
15837 case PM_CONTEXT_IF:
15841 case PM_CONTEXT_MODULE:
15843 case PM_CONTEXT_PARENS:
15846 case PM_CONTEXT_TERNARY:
15847 case PM_CONTEXT_UNLESS:
15848 // In these contexts we should continue walking up the list of
15849 // contexts.
15850 break;
15851 case PM_CONTEXT_NONE:
15852 // This case should never happen.
15853 assert(false && "unreachable");
15854 break;
15855 }
15856 }
15857}
15858
15863static pm_node_list_t *
15864push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15865 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15866 parser->current_block_exits = current_block_exits;
15867 return previous_block_exits;
15868}
15869
15875static void
15876flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15877 pm_node_t *block_exit;
15878 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15879 const char *type;
15880
15881 switch (PM_NODE_TYPE(block_exit)) {
15882 case PM_BREAK_NODE: type = "break"; break;
15883 case PM_NEXT_NODE: type = "next"; break;
15884 case PM_REDO_NODE: type = "redo"; break;
15885 default: assert(false && "unreachable"); type = ""; break;
15886 }
15887
15888 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15889 }
15890
15891 parser->current_block_exits = previous_block_exits;
15892}
15893
15898static void
15899pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15900 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15901 // If we matched a trailing while/until, then all of the block exits in
15902 // the contained list are valid. In this case we do not need to do
15903 // anything.
15904 parser->current_block_exits = previous_block_exits;
15905 } else if (previous_block_exits != NULL) {
15906 // If we did not matching a trailing while/until, then all of the block
15907 // exits contained in the list are invalid for this specific context.
15908 // However, they could still become valid in a higher level context if
15909 // there is another list above this one. In this case we'll push all of
15910 // the block exits up to the previous list.
15911 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15912 parser->current_block_exits = previous_block_exits;
15913 } else {
15914 // If we did not match a trailing while/until and this was the last
15915 // chance to do so, then all of the block exits in the list are invalid
15916 // and we need to add an error for each of them.
15917 flush_block_exits(parser, previous_block_exits);
15918 }
15919}
15920
15921static inline pm_node_t *
15922parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15923 context_push(parser, PM_CONTEXT_PREDICATE);
15924 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15925 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15926
15927 // Predicates are closed by a term, a "then", or a term and then a "then".
15928 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15929
15930 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15931 predicate_closed = true;
15932 *then_keyword = parser->previous;
15933 }
15934
15935 if (!predicate_closed) {
15936 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15937 }
15938
15939 context_pop(parser);
15940 return predicate;
15941}
15942
15943static inline pm_node_t *
15944parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15945 pm_node_list_t current_block_exits = { 0 };
15946 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15947
15948 pm_token_t keyword = parser->previous;
15949 pm_token_t then_keyword = not_provided(parser);
15950
15951 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15952 pm_statements_node_t *statements = NULL;
15953
15954 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15955 pm_accepts_block_stack_push(parser, true);
15956 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15957 pm_accepts_block_stack_pop(parser);
15958 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15959 }
15960
15961 pm_token_t end_keyword = not_provided(parser);
15962 pm_node_t *parent = NULL;
15963
15964 switch (context) {
15965 case PM_CONTEXT_IF:
15966 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15967 break;
15968 case PM_CONTEXT_UNLESS:
15969 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15970 break;
15971 default:
15972 assert(false && "unreachable");
15973 break;
15974 }
15975
15976 pm_node_t *current = parent;
15977
15978 // Parse any number of elsif clauses. This will form a linked list of if
15979 // nodes pointing to each other from the top.
15980 if (context == PM_CONTEXT_IF) {
15981 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15982 if (parser_end_of_line_p(parser)) {
15983 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15984 }
15985
15986 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15987 pm_token_t elsif_keyword = parser->current;
15988 parser_lex(parser);
15989
15990 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15991 pm_accepts_block_stack_push(parser, true);
15992
15993 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15994 pm_accepts_block_stack_pop(parser);
15995 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15996
15997 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15998 ((pm_if_node_t *) current)->subsequent = elsif;
15999 current = elsif;
16000 }
16001 }
16002
16003 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
16004 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
16005 opening_newline_index = token_newline_index(parser);
16006
16007 parser_lex(parser);
16008 pm_token_t else_keyword = parser->previous;
16009
16010 pm_accepts_block_stack_push(parser, true);
16011 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
16012 pm_accepts_block_stack_pop(parser);
16013
16014 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
16015 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
16016 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
16017
16018 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
16019
16020 switch (context) {
16021 case PM_CONTEXT_IF:
16022 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
16023 break;
16024 case PM_CONTEXT_UNLESS:
16025 ((pm_unless_node_t *) parent)->else_clause = else_node;
16026 break;
16027 default:
16028 assert(false && "unreachable");
16029 break;
16030 }
16031 } else {
16032 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
16033 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
16034 }
16035
16036 // Set the appropriate end location for all of the nodes in the subtree.
16037 switch (context) {
16038 case PM_CONTEXT_IF: {
16039 pm_node_t *current = parent;
16040 bool recursing = true;
16041
16042 while (recursing) {
16043 switch (PM_NODE_TYPE(current)) {
16044 case PM_IF_NODE:
16045 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
16046 current = ((pm_if_node_t *) current)->subsequent;
16047 recursing = current != NULL;
16048 break;
16049 case PM_ELSE_NODE:
16050 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
16051 recursing = false;
16052 break;
16053 default: {
16054 recursing = false;
16055 break;
16056 }
16057 }
16058 }
16059 break;
16060 }
16061 case PM_CONTEXT_UNLESS:
16062 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
16063 break;
16064 default:
16065 assert(false && "unreachable");
16066 break;
16067 }
16068
16069 pop_block_exits(parser, previous_block_exits);
16070 pm_node_list_free(&current_block_exits);
16071
16072 return parent;
16073}
16074
16079#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16080 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
16081 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
16082 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
16083 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
16084 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
16085 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
16086 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
16087 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
16088 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
16089 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
16090
16095#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
16096 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
16097 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
16098 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
16099 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
16100 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
16101 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
16102 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
16103
16109#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
16110 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
16111 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
16112 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
16113 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16114 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16115 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16116 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16117 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16118
16123#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16124 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16125 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16126 case PM_TOKEN_CLASS_VARIABLE
16127
16132#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16133 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16134 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16135 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16136
16137// Assert here that the flags are the same so that we can safely switch the type
16138// of the node without having to move the flags.
16139PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16140
16145static inline pm_node_flags_t
16146parse_unescaped_encoding(const pm_parser_t *parser) {
16147 if (parser->explicit_encoding != NULL) {
16149 // If the there's an explicit encoding and it's using a UTF-8 escape
16150 // sequence, then mark the string as UTF-8.
16151 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
16152 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16153 // If there's a non-UTF-8 escape sequence being used, then the
16154 // string uses the source encoding, unless the source is marked as
16155 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16156 // order to keep the string valid.
16157 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
16158 }
16159 }
16160 return 0;
16161}
16162
16167static pm_node_t *
16168parse_string_part(pm_parser_t *parser, uint16_t depth) {
16169 switch (parser->current.type) {
16170 // Here the lexer has returned to us plain string content. In this case
16171 // we'll create a string node that has no opening or closing and return that
16172 // as the part. These kinds of parts look like:
16173 //
16174 // "aaa #{bbb} #@ccc ddd"
16175 // ^^^^ ^ ^^^^
16176 case PM_TOKEN_STRING_CONTENT: {
16177 pm_token_t opening = not_provided(parser);
16178 pm_token_t closing = not_provided(parser);
16179
16180 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16181 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16182
16183 parser_lex(parser);
16184 return node;
16185 }
16186 // Here the lexer has returned the beginning of an embedded expression. In
16187 // that case we'll parse the inner statements and return that as the part.
16188 // These kinds of parts look like:
16189 //
16190 // "aaa #{bbb} #@ccc ddd"
16191 // ^^^^^^
16192 case PM_TOKEN_EMBEXPR_BEGIN: {
16193 // Ruby disallows seeing encoding around interpolation in strings,
16194 // even though it is known at parse time.
16195 parser->explicit_encoding = NULL;
16196
16197 pm_lex_state_t state = parser->lex_state;
16198 int brace_nesting = parser->brace_nesting;
16199
16200 parser->brace_nesting = 0;
16201 lex_state_set(parser, PM_LEX_STATE_BEG);
16202 parser_lex(parser);
16203
16204 pm_token_t opening = parser->previous;
16205 pm_statements_node_t *statements = NULL;
16206
16207 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16208 pm_accepts_block_stack_push(parser, true);
16209 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16210 pm_accepts_block_stack_pop(parser);
16211 }
16212
16213 parser->brace_nesting = brace_nesting;
16214 lex_state_set(parser, state);
16215
16216 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16217 pm_token_t closing = parser->previous;
16218
16219 // If this set of embedded statements only contains a single
16220 // statement, then Ruby does not consider it as a possible statement
16221 // that could emit a line event.
16222 if (statements != NULL && statements->body.size == 1) {
16223 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16224 }
16225
16226 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16227 }
16228
16229 // Here the lexer has returned the beginning of an embedded variable.
16230 // In that case we'll parse the variable and create an appropriate node
16231 // for it and then return that node. These kinds of parts look like:
16232 //
16233 // "aaa #{bbb} #@ccc ddd"
16234 // ^^^^^
16235 case PM_TOKEN_EMBVAR: {
16236 // Ruby disallows seeing encoding around interpolation in strings,
16237 // even though it is known at parse time.
16238 parser->explicit_encoding = NULL;
16239
16240 lex_state_set(parser, PM_LEX_STATE_BEG);
16241 parser_lex(parser);
16242
16243 pm_token_t operator = parser->previous;
16244 pm_node_t *variable;
16245
16246 switch (parser->current.type) {
16247 // In this case a back reference is being interpolated. We'll
16248 // create a global variable read node.
16249 case PM_TOKEN_BACK_REFERENCE:
16250 parser_lex(parser);
16251 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16252 break;
16253 // In this case an nth reference is being interpolated. We'll
16254 // create a global variable read node.
16255 case PM_TOKEN_NUMBERED_REFERENCE:
16256 parser_lex(parser);
16257 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16258 break;
16259 // In this case a global variable is being interpolated. We'll
16260 // create a global variable read node.
16261 case PM_TOKEN_GLOBAL_VARIABLE:
16262 parser_lex(parser);
16263 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16264 break;
16265 // In this case an instance variable is being interpolated.
16266 // We'll create an instance variable read node.
16267 case PM_TOKEN_INSTANCE_VARIABLE:
16268 parser_lex(parser);
16269 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16270 break;
16271 // In this case a class variable is being interpolated. We'll
16272 // create a class variable read node.
16273 case PM_TOKEN_CLASS_VARIABLE:
16274 parser_lex(parser);
16275 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16276 break;
16277 // We can hit here if we got an invalid token. In that case
16278 // we'll not attempt to lex this token and instead just return a
16279 // missing node.
16280 default:
16281 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16282 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16283 break;
16284 }
16285
16286 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16287 }
16288 default:
16289 parser_lex(parser);
16290 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16291 return NULL;
16292 }
16293}
16294
16300static const uint8_t *
16301parse_operator_symbol_name(const pm_token_t *name) {
16302 switch (name->type) {
16303 case PM_TOKEN_TILDE:
16304 case PM_TOKEN_BANG:
16305 if (name->end[-1] == '@') return name->end - 1;
16307 default:
16308 return name->end;
16309 }
16310}
16311
16312static pm_node_t *
16313parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16314 pm_token_t closing = not_provided(parser);
16315 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16316
16317 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16318
16319 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16320 parser_lex(parser);
16321
16322 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16323 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16324
16325 return (pm_node_t *) symbol;
16326}
16327
16333static pm_node_t *
16334parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16335 const pm_token_t opening = parser->previous;
16336
16337 if (lex_mode->mode != PM_LEX_STRING) {
16338 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16339
16340 switch (parser->current.type) {
16341 case PM_CASE_OPERATOR:
16342 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16343 case PM_TOKEN_IDENTIFIER:
16344 case PM_TOKEN_CONSTANT:
16345 case PM_TOKEN_INSTANCE_VARIABLE:
16346 case PM_TOKEN_METHOD_NAME:
16347 case PM_TOKEN_CLASS_VARIABLE:
16348 case PM_TOKEN_GLOBAL_VARIABLE:
16349 case PM_TOKEN_NUMBERED_REFERENCE:
16350 case PM_TOKEN_BACK_REFERENCE:
16351 case PM_CASE_KEYWORD:
16352 parser_lex(parser);
16353 break;
16354 default:
16355 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16356 break;
16357 }
16358
16359 pm_token_t closing = not_provided(parser);
16360 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16361
16362 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16363 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16364
16365 return (pm_node_t *) symbol;
16366 }
16367
16368 if (lex_mode->as.string.interpolation) {
16369 // If we have the end of the symbol, then we can return an empty symbol.
16370 if (match1(parser, PM_TOKEN_STRING_END)) {
16371 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16372 parser_lex(parser);
16373
16374 pm_token_t content = not_provided(parser);
16375 pm_token_t closing = parser->previous;
16376 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16377 }
16378
16379 // Now we can parse the first part of the symbol.
16380 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16381
16382 // If we got a string part, then it's possible that we could transform
16383 // what looks like an interpolated symbol into a regular symbol.
16384 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16385 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16386 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16387
16388 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16389 }
16390
16391 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16392 if (part) pm_interpolated_symbol_node_append(symbol, part);
16393
16394 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16395 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16396 pm_interpolated_symbol_node_append(symbol, part);
16397 }
16398 }
16399
16400 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16401 if (match1(parser, PM_TOKEN_EOF)) {
16402 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16403 } else {
16404 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16405 }
16406
16407 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16408 return (pm_node_t *) symbol;
16409 }
16410
16411 pm_token_t content;
16412 pm_string_t unescaped;
16413
16414 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16415 content = parser->current;
16416 unescaped = parser->current_string;
16417 parser_lex(parser);
16418
16419 // If we have two string contents in a row, then the content of this
16420 // symbol is split because of heredoc contents. This looks like:
16421 //
16422 // <<A; :'a
16423 // A
16424 // b'
16425 //
16426 // In this case, the best way we have to represent this is as an
16427 // interpolated string node, so that's what we'll do here.
16428 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16429 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16430 pm_token_t bounds = not_provided(parser);
16431
16432 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16433 pm_interpolated_symbol_node_append(symbol, part);
16434
16435 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16436 pm_interpolated_symbol_node_append(symbol, part);
16437
16438 if (next_state != PM_LEX_STATE_NONE) {
16439 lex_state_set(parser, next_state);
16440 }
16441
16442 parser_lex(parser);
16443 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16444
16445 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16446 return (pm_node_t *) symbol;
16447 }
16448 } else {
16449 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16450 pm_string_shared_init(&unescaped, content.start, content.end);
16451 }
16452
16453 if (next_state != PM_LEX_STATE_NONE) {
16454 lex_state_set(parser, next_state);
16455 }
16456
16457 if (match1(parser, PM_TOKEN_EOF)) {
16458 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16459 } else {
16460 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16461 }
16462
16463 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16464}
16465
16470static inline pm_node_t *
16471parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16472 switch (parser->current.type) {
16473 case PM_CASE_OPERATOR: {
16474 const pm_token_t opening = not_provided(parser);
16475 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16476 }
16477 case PM_CASE_KEYWORD:
16478 case PM_TOKEN_CONSTANT:
16479 case PM_TOKEN_IDENTIFIER:
16480 case PM_TOKEN_METHOD_NAME: {
16481 parser_lex(parser);
16482
16483 pm_token_t opening = not_provided(parser);
16484 pm_token_t closing = not_provided(parser);
16485 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16486
16487 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16488 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16489
16490 return (pm_node_t *) symbol;
16491 }
16492 case PM_TOKEN_SYMBOL_BEGIN: {
16493 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16494 parser_lex(parser);
16495
16496 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16497 }
16498 default:
16499 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16500 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16501 }
16502}
16503
16510static inline pm_node_t *
16511parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16512 switch (parser->current.type) {
16513 case PM_CASE_OPERATOR: {
16514 const pm_token_t opening = not_provided(parser);
16515 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16516 }
16517 case PM_CASE_KEYWORD:
16518 case PM_TOKEN_CONSTANT:
16519 case PM_TOKEN_IDENTIFIER:
16520 case PM_TOKEN_METHOD_NAME: {
16521 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16522 parser_lex(parser);
16523
16524 pm_token_t opening = not_provided(parser);
16525 pm_token_t closing = not_provided(parser);
16526 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16527
16528 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16529 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16530
16531 return (pm_node_t *) symbol;
16532 }
16533 case PM_TOKEN_SYMBOL_BEGIN: {
16534 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16535 parser_lex(parser);
16536
16537 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16538 }
16539 case PM_TOKEN_BACK_REFERENCE:
16540 parser_lex(parser);
16541 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16542 case PM_TOKEN_NUMBERED_REFERENCE:
16543 parser_lex(parser);
16544 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16545 case PM_TOKEN_GLOBAL_VARIABLE:
16546 parser_lex(parser);
16547 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16548 default:
16549 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16550 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16551 }
16552}
16553
16558static pm_node_t *
16559parse_variable(pm_parser_t *parser) {
16560 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16561 int depth;
16562 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16563
16564 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16565 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16566 }
16567
16568 pm_scope_t *current_scope = parser->current_scope;
16569 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16570 if (is_numbered_param) {
16571 // When you use a numbered parameter, it implies the existence of
16572 // all of the locals that exist before it. For example, referencing
16573 // _2 means that _1 must exist. Therefore here we loop through all
16574 // of the possibilities and add them into the constant pool.
16575 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16576 for (uint8_t number = 1; number <= maximum; number++) {
16577 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16578 }
16579
16580 if (!match1(parser, PM_TOKEN_EQUAL)) {
16581 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16582 }
16583
16584 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16585 pm_node_list_append(&current_scope->implicit_parameters, node);
16586
16587 return node;
16588 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16589 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16590 pm_node_list_append(&current_scope->implicit_parameters, node);
16591
16592 return node;
16593 }
16594 }
16595
16596 return NULL;
16597}
16598
16602static pm_node_t *
16603parse_variable_call(pm_parser_t *parser) {
16604 pm_node_flags_t flags = 0;
16605
16606 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16607 pm_node_t *node = parse_variable(parser);
16608 if (node != NULL) return node;
16609 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16610 }
16611
16612 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16613 pm_node_flag_set((pm_node_t *)node, flags);
16614
16615 return (pm_node_t *) node;
16616}
16617
16623static inline pm_token_t
16624parse_method_definition_name(pm_parser_t *parser) {
16625 switch (parser->current.type) {
16626 case PM_CASE_KEYWORD:
16627 case PM_TOKEN_CONSTANT:
16628 case PM_TOKEN_METHOD_NAME:
16629 parser_lex(parser);
16630 return parser->previous;
16631 case PM_TOKEN_IDENTIFIER:
16632 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16633 parser_lex(parser);
16634 return parser->previous;
16635 case PM_CASE_OPERATOR:
16636 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16637 parser_lex(parser);
16638 return parser->previous;
16639 default:
16640 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16641 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16642 }
16643}
16644
16645static void
16646parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16647 // Get a reference to the string struct that is being held by the string
16648 // node. This is the value we're going to actually manipulate.
16649 pm_string_ensure_owned(string);
16650
16651 // Now get the bounds of the existing string. We'll use this as a
16652 // destination to move bytes into. We'll also use it for bounds checking
16653 // since we don't require that these strings be null terminated.
16654 size_t dest_length = pm_string_length(string);
16655 const uint8_t *source_cursor = (uint8_t *) string->source;
16656 const uint8_t *source_end = source_cursor + dest_length;
16657
16658 // We're going to move bytes backward in the string when we get leading
16659 // whitespace, so we'll maintain a pointer to the current position in the
16660 // string that we're writing to.
16661 size_t trimmed_whitespace = 0;
16662
16663 // While we haven't reached the amount of common whitespace that we need to
16664 // trim and we haven't reached the end of the string, we'll keep trimming
16665 // whitespace. Trimming in this context means skipping over these bytes such
16666 // that they aren't copied into the new string.
16667 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16668 if (*source_cursor == '\t') {
16669 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16670 if (trimmed_whitespace > common_whitespace) break;
16671 } else {
16672 trimmed_whitespace++;
16673 }
16674
16675 source_cursor++;
16676 dest_length--;
16677 }
16678
16679 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16680 string->length = dest_length;
16681}
16682
16686static void
16687parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16688 // The next node should be dedented if it's the first node in the list or if
16689 // it follows a string node.
16690 bool dedent_next = true;
16691
16692 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16693 // keep around two indices: a read and a write. If we end up trimming all of
16694 // the whitespace from a node, then we'll drop it from the list entirely.
16695 size_t write_index = 0;
16696
16697 pm_node_t *node;
16698 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16699 // We're not manipulating child nodes that aren't strings. In this case
16700 // we'll skip past it and indicate that the subsequent node should not
16701 // be dedented.
16702 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16703 nodes->nodes[write_index++] = node;
16704 dedent_next = false;
16705 continue;
16706 }
16707
16708 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16709 if (dedent_next) {
16710 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16711 }
16712
16713 if (string_node->unescaped.length == 0) {
16714 pm_node_destroy(parser, node);
16715 } else {
16716 nodes->nodes[write_index++] = node;
16717 }
16718
16719 // We always dedent the next node if it follows a string node.
16720 dedent_next = true;
16721 }
16722
16723 nodes->size = write_index;
16724}
16725
16729static pm_token_t
16730parse_strings_empty_content(const uint8_t *location) {
16731 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16732}
16733
16737static inline pm_node_t *
16738parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16739 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16740 bool concating = false;
16741
16742 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16743 pm_node_t *node = NULL;
16744
16745 // Here we have found a string literal. We'll parse it and add it to
16746 // the list of strings.
16747 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16748 assert(lex_mode->mode == PM_LEX_STRING);
16749 bool lex_interpolation = lex_mode->as.string.interpolation;
16750 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16751
16752 pm_token_t opening = parser->current;
16753 parser_lex(parser);
16754
16755 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16756 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16757 // If we get here, then we have an end immediately after a
16758 // start. In that case we'll create an empty content token and
16759 // return an uninterpolated string.
16760 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16761 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16762
16763 pm_string_shared_init(&string->unescaped, content.start, content.end);
16764 node = (pm_node_t *) string;
16765 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16766 // If we get here, then we have an end of a label immediately
16767 // after a start. In that case we'll create an empty symbol
16768 // node.
16769 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16770 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16771
16772 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16773 node = (pm_node_t *) symbol;
16774
16775 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16776 } else if (!lex_interpolation) {
16777 // If we don't accept interpolation then we expect the string to
16778 // start with a single string content node.
16779 pm_string_t unescaped;
16780 pm_token_t content;
16781
16782 if (match1(parser, PM_TOKEN_EOF)) {
16783 unescaped = PM_STRING_EMPTY;
16784 content = not_provided(parser);
16785 } else {
16786 unescaped = parser->current_string;
16787 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16788 content = parser->previous;
16789 }
16790
16791 // It is unfortunately possible to have multiple string content
16792 // nodes in a row in the case that there's heredoc content in
16793 // the middle of the string, like this cursed example:
16794 //
16795 // <<-END+'b
16796 // a
16797 // END
16798 // c'+'d'
16799 //
16800 // In that case we need to switch to an interpolated string to
16801 // be able to contain all of the parts.
16802 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16803 pm_node_list_t parts = { 0 };
16804
16805 pm_token_t delimiters = not_provided(parser);
16806 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16807 pm_node_list_append(&parts, part);
16808
16809 do {
16810 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16811 pm_node_list_append(&parts, part);
16812 parser_lex(parser);
16813 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16814
16815 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16816 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16817
16818 pm_node_list_free(&parts);
16819 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16820 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16821 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16822 } else if (match1(parser, PM_TOKEN_EOF)) {
16823 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16824 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16825 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16826 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16827 } else {
16828 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16829 parser->previous.start = parser->previous.end;
16830 parser->previous.type = PM_TOKEN_MISSING;
16831 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16832 }
16833 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16834 // In this case we've hit string content so we know the string
16835 // at least has something in it. We'll need to check if the
16836 // following token is the end (in which case we can return a
16837 // plain string) or if it's not then it has interpolation.
16838 pm_token_t content = parser->current;
16839 pm_string_t unescaped = parser->current_string;
16840 parser_lex(parser);
16841
16842 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16843 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16844 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16845
16846 // Kind of odd behavior, but basically if we have an
16847 // unterminated string and it ends in a newline, we back up one
16848 // character so that the error message is on the last line of
16849 // content in the string.
16850 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16851 const uint8_t *location = parser->previous.end;
16852 if (location > parser->start && location[-1] == '\n') location--;
16853 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16854
16855 parser->previous.start = parser->previous.end;
16856 parser->previous.type = PM_TOKEN_MISSING;
16857 }
16858 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16859 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16860 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16861 } else {
16862 // If we get here, then we have interpolation so we'll need
16863 // to create a string or symbol node with interpolation.
16864 pm_node_list_t parts = { 0 };
16865 pm_token_t string_opening = not_provided(parser);
16866 pm_token_t string_closing = not_provided(parser);
16867
16868 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16869 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16870 pm_node_list_append(&parts, part);
16871
16872 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16873 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16874 pm_node_list_append(&parts, part);
16875 }
16876 }
16877
16878 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16879 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16880 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16881 } else if (match1(parser, PM_TOKEN_EOF)) {
16882 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16883 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16884 } else {
16885 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16886 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16887 }
16888
16889 pm_node_list_free(&parts);
16890 }
16891 } else {
16892 // If we get here, then the first part of the string is not plain
16893 // string content, in which case we need to parse the string as an
16894 // interpolated string.
16895 pm_node_list_t parts = { 0 };
16896 pm_node_t *part;
16897
16898 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16899 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16900 pm_node_list_append(&parts, part);
16901 }
16902 }
16903
16904 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16905 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16906 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16907 } else if (match1(parser, PM_TOKEN_EOF)) {
16908 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16909 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16910 } else {
16911 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16912 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16913 }
16914
16915 pm_node_list_free(&parts);
16916 }
16917
16918 if (current == NULL) {
16919 // If the node we just parsed is a symbol node, then we can't
16920 // concatenate it with anything else, so we can now return that
16921 // node.
16922 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16923 return node;
16924 }
16925
16926 // If we don't already have a node, then it's fine and we can just
16927 // set the result to be the node we just parsed.
16928 current = node;
16929 } else {
16930 // Otherwise we need to check the type of the node we just parsed.
16931 // If it cannot be concatenated with the previous node, then we'll
16932 // need to add a syntax error.
16933 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16934 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16935 }
16936
16937 // If we haven't already created our container for concatenation,
16938 // we'll do that now.
16939 if (!concating) {
16940 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16941 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16942 }
16943
16944 concating = true;
16945 pm_token_t bounds = not_provided(parser);
16946
16947 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16948 pm_interpolated_string_node_append(container, current);
16949 current = (pm_node_t *) container;
16950 }
16951
16952 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16953 }
16954 }
16955
16956 return current;
16957}
16958
16959#define PM_PARSE_PATTERN_SINGLE 0
16960#define PM_PARSE_PATTERN_TOP 1
16961#define PM_PARSE_PATTERN_MULTI 2
16962
16963static pm_node_t *
16964parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16965
16971static void
16972parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16973 // Skip this capture if it starts with an underscore.
16974 if (*location->start == '_') return;
16975
16976 if (pm_constant_id_list_includes(captures, capture)) {
16977 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16978 } else {
16979 pm_constant_id_list_append(captures, capture);
16980 }
16981}
16982
16986static pm_node_t *
16987parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16988 // Now, if there are any :: operators that follow, parse them as constant
16989 // path nodes.
16990 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16991 pm_token_t delimiter = parser->previous;
16992 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16993 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16994 }
16995
16996 // If there is a [ or ( that follows, then this is part of a larger pattern
16997 // expression. We'll parse the inner pattern here, then modify the returned
16998 // inner pattern with our constant path attached.
16999 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
17000 return node;
17001 }
17002
17003 pm_token_t opening;
17004 pm_token_t closing;
17005 pm_node_t *inner = NULL;
17006
17007 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
17008 opening = parser->previous;
17009 accept1(parser, PM_TOKEN_NEWLINE);
17010
17011 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17012 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17013 accept1(parser, PM_TOKEN_NEWLINE);
17014 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17015 }
17016
17017 closing = parser->previous;
17018 } else {
17019 parser_lex(parser);
17020 opening = parser->previous;
17021 accept1(parser, PM_TOKEN_NEWLINE);
17022
17023 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17024 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17025 accept1(parser, PM_TOKEN_NEWLINE);
17026 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17027 }
17028
17029 closing = parser->previous;
17030 }
17031
17032 if (!inner) {
17033 // If there was no inner pattern, then we have something like Foo() or
17034 // Foo[]. In that case we'll create an array pattern with no requireds.
17035 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
17036 }
17037
17038 // Now that we have the inner pattern, check to see if it's an array, find,
17039 // or hash pattern. If it is, then we'll attach our constant path to it if
17040 // it doesn't already have a constant. If it's not one of those node types
17041 // or it does have a constant, then we'll create an array pattern.
17042 switch (PM_NODE_TYPE(inner)) {
17043 case PM_ARRAY_PATTERN_NODE: {
17044 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17045
17046 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17047 pattern_node->base.location.start = node->location.start;
17048 pattern_node->base.location.end = closing.end;
17049
17050 pattern_node->constant = node;
17051 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17052 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17053
17054 return (pm_node_t *) pattern_node;
17055 }
17056
17057 break;
17058 }
17059 case PM_FIND_PATTERN_NODE: {
17060 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17061
17062 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17063 pattern_node->base.location.start = node->location.start;
17064 pattern_node->base.location.end = closing.end;
17065
17066 pattern_node->constant = node;
17067 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17068 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17069
17070 return (pm_node_t *) pattern_node;
17071 }
17072
17073 break;
17074 }
17075 case PM_HASH_PATTERN_NODE: {
17076 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
17077
17078 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
17079 pattern_node->base.location.start = node->location.start;
17080 pattern_node->base.location.end = closing.end;
17081
17082 pattern_node->constant = node;
17083 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17084 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17085
17086 return (pm_node_t *) pattern_node;
17087 }
17088
17089 break;
17090 }
17091 default:
17092 break;
17093 }
17094
17095 // If we got here, then we didn't return one of the inner patterns by
17096 // attaching its constant. In this case we'll create an array pattern and
17097 // attach our constant to it.
17098 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
17099 pm_array_pattern_node_requireds_append(pattern_node, inner);
17100 return (pm_node_t *) pattern_node;
17101}
17102
17106static pm_splat_node_t *
17107parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17108 assert(parser->previous.type == PM_TOKEN_USTAR);
17109 pm_token_t operator = parser->previous;
17110 pm_node_t *name = NULL;
17111
17112 // Rest patterns don't necessarily have a name associated with them. So we
17113 // will check for that here. If they do, then we'll add it to the local
17114 // table since this pattern will cause it to become a local variable.
17115 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17116 pm_token_t identifier = parser->previous;
17117 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17118
17119 int depth;
17120 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17121 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17122 }
17123
17124 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17125 name = (pm_node_t *) pm_local_variable_target_node_create(
17126 parser,
17127 &PM_LOCATION_TOKEN_VALUE(&identifier),
17128 constant_id,
17129 (uint32_t) (depth == -1 ? 0 : depth)
17130 );
17131 }
17132
17133 // Finally we can return the created node.
17134 return pm_splat_node_create(parser, &operator, name);
17135}
17136
17140static pm_node_t *
17141parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17142 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17143 parser_lex(parser);
17144
17145 pm_token_t operator = parser->previous;
17146 pm_node_t *value = NULL;
17147
17148 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17149 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17150 }
17151
17152 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17153 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17154
17155 int depth;
17156 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17157 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17158 }
17159
17160 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17161 value = (pm_node_t *) pm_local_variable_target_node_create(
17162 parser,
17163 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17164 constant_id,
17165 (uint32_t) (depth == -1 ? 0 : depth)
17166 );
17167 }
17168
17169 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17170}
17171
17176static bool
17177pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17178 ptrdiff_t length = end - start;
17179 if (length == 0) return false;
17180
17181 // First ensure that it starts with a valid identifier starting character.
17182 size_t width = char_is_identifier_start(parser, start, end - start);
17183 if (width == 0) return false;
17184
17185 // Next, ensure that it's not an uppercase character.
17186 if (parser->encoding_changed) {
17187 if (parser->encoding->isupper_char(start, length)) return false;
17188 } else {
17189 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17190 }
17191
17192 // Next, iterate through all of the bytes of the string to ensure that they
17193 // are all valid identifier characters.
17194 const uint8_t *cursor = start + width;
17195 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17196 return cursor == end;
17197}
17198
17203static pm_node_t *
17204parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17205 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17206
17207 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17208 int depth = -1;
17209
17210 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17211 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17212 } else {
17213 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17214
17215 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17216 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17217 }
17218 }
17219
17220 if (depth == -1) {
17221 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17222 }
17223
17224 parse_pattern_capture(parser, captures, constant_id, value_loc);
17225 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17226 parser,
17227 value_loc,
17228 constant_id,
17229 (uint32_t) (depth == -1 ? 0 : depth)
17230 );
17231
17232 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17233}
17234
17239static void
17240parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17241 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17242 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17243 }
17244}
17245
17250parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17251 pm_node_list_t assocs = { 0 };
17252 pm_static_literals_t keys = { 0 };
17253 pm_node_t *rest = NULL;
17254
17255 switch (PM_NODE_TYPE(first_node)) {
17256 case PM_ASSOC_SPLAT_NODE:
17257 case PM_NO_KEYWORDS_PARAMETER_NODE:
17258 rest = first_node;
17259 break;
17260 case PM_SYMBOL_NODE: {
17261 if (pm_symbol_node_label_p(first_node)) {
17262 parse_pattern_hash_key(parser, &keys, first_node);
17263 pm_node_t *value;
17264
17265 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17266 // Otherwise, we will create an implicit local variable
17267 // target for the value.
17268 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17269 } else {
17270 // Here we have a value for the first assoc in the list, so
17271 // we will parse it now.
17272 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17273 }
17274
17275 pm_token_t operator = not_provided(parser);
17276 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17277
17278 pm_node_list_append(&assocs, assoc);
17279 break;
17280 }
17281 }
17283 default: {
17284 // If we get anything else, then this is an error. For this we'll
17285 // create a missing node for the value and create an assoc node for
17286 // the first node in the list.
17287 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17288 pm_parser_err_node(parser, first_node, diag_id);
17289
17290 pm_token_t operator = not_provided(parser);
17291 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17292 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17293
17294 pm_node_list_append(&assocs, assoc);
17295 break;
17296 }
17297 }
17298
17299 // If there are any other assocs, then we'll parse them now.
17300 while (accept1(parser, PM_TOKEN_COMMA)) {
17301 // Here we need to break to support trailing commas.
17302 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17303 // Trailing commas are not allowed to follow a rest pattern.
17304 if (rest != NULL) {
17305 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17306 }
17307
17308 break;
17309 }
17310
17311 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17312 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17313
17314 if (rest == NULL) {
17315 rest = assoc;
17316 } else {
17317 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17318 pm_node_list_append(&assocs, assoc);
17319 }
17320 } else {
17321 pm_node_t *key;
17322
17323 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17324 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17325
17326 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
17327 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17328 } else if (!pm_symbol_node_label_p(key)) {
17329 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17330 }
17331 } else {
17332 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17333 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17334 }
17335
17336 parse_pattern_hash_key(parser, &keys, key);
17337 pm_node_t *value = NULL;
17338
17339 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17340 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17341 } else {
17342 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17343 }
17344
17345 pm_token_t operator = not_provided(parser);
17346 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17347
17348 if (rest != NULL) {
17349 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17350 }
17351
17352 pm_node_list_append(&assocs, assoc);
17353 }
17354 }
17355
17356 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17357 xfree(assocs.nodes);
17358
17359 pm_static_literals_free(&keys);
17360 return node;
17361}
17362
17366static pm_node_t *
17367parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17368 switch (parser->current.type) {
17369 case PM_TOKEN_IDENTIFIER:
17370 case PM_TOKEN_METHOD_NAME: {
17371 parser_lex(parser);
17372 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17373
17374 int depth;
17375 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17376 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17377 }
17378
17379 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17380 return (pm_node_t *) pm_local_variable_target_node_create(
17381 parser,
17382 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17383 constant_id,
17384 (uint32_t) (depth == -1 ? 0 : depth)
17385 );
17386 }
17387 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17388 pm_token_t opening = parser->current;
17389 parser_lex(parser);
17390
17391 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17392 // If we have an empty array pattern, then we'll just return a new
17393 // array pattern node.
17394 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17395 }
17396
17397 // Otherwise, we'll parse the inner pattern, then deal with it depending
17398 // on the type it returns.
17399 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17400
17401 accept1(parser, PM_TOKEN_NEWLINE);
17402 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17403 pm_token_t closing = parser->previous;
17404
17405 switch (PM_NODE_TYPE(inner)) {
17406 case PM_ARRAY_PATTERN_NODE: {
17407 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17408 if (pattern_node->opening_loc.start == NULL) {
17409 pattern_node->base.location.start = opening.start;
17410 pattern_node->base.location.end = closing.end;
17411
17412 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17413 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17414
17415 return (pm_node_t *) pattern_node;
17416 }
17417
17418 break;
17419 }
17420 case PM_FIND_PATTERN_NODE: {
17421 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17422 if (pattern_node->opening_loc.start == NULL) {
17423 pattern_node->base.location.start = opening.start;
17424 pattern_node->base.location.end = closing.end;
17425
17426 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17427 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17428
17429 return (pm_node_t *) pattern_node;
17430 }
17431
17432 break;
17433 }
17434 default:
17435 break;
17436 }
17437
17438 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17439 pm_array_pattern_node_requireds_append(node, inner);
17440 return (pm_node_t *) node;
17441 }
17442 case PM_TOKEN_BRACE_LEFT: {
17443 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17444 parser->pattern_matching_newlines = false;
17445
17447 pm_token_t opening = parser->current;
17448 parser_lex(parser);
17449
17450 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17451 // If we have an empty hash pattern, then we'll just return a new hash
17452 // pattern node.
17453 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17454 } else {
17455 pm_node_t *first_node;
17456
17457 switch (parser->current.type) {
17458 case PM_TOKEN_LABEL:
17459 parser_lex(parser);
17460 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17461 break;
17462 case PM_TOKEN_USTAR_STAR:
17463 first_node = parse_pattern_keyword_rest(parser, captures);
17464 break;
17465 case PM_TOKEN_STRING_BEGIN:
17466 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17467 break;
17468 default: {
17469 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17470 parser_lex(parser);
17471
17472 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17473 break;
17474 }
17475 }
17476
17477 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17478
17479 accept1(parser, PM_TOKEN_NEWLINE);
17480 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17481 pm_token_t closing = parser->previous;
17482
17483 node->base.location.start = opening.start;
17484 node->base.location.end = closing.end;
17485
17486 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17487 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17488 }
17489
17490 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17491 return (pm_node_t *) node;
17492 }
17493 case PM_TOKEN_UDOT_DOT:
17494 case PM_TOKEN_UDOT_DOT_DOT: {
17495 pm_token_t operator = parser->current;
17496 parser_lex(parser);
17497
17498 // Since we have a unary range operator, we need to parse the subsequent
17499 // expression as the right side of the range.
17500 switch (parser->current.type) {
17501 case PM_CASE_PRIMITIVE: {
17502 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17503 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17504 }
17505 default: {
17506 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17507 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17508 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17509 }
17510 }
17511 }
17512 case PM_CASE_PRIMITIVE: {
17513 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17514
17515 // If we found a label, we need to immediately return to the caller.
17516 if (pm_symbol_node_label_p(node)) return node;
17517
17518 // Call nodes (arithmetic operations) are not allowed in patterns
17519 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17520 pm_parser_err_node(parser, node, diag_id);
17521 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17522 pm_node_destroy(parser, node);
17523 return (pm_node_t *) missing_node;
17524 }
17525
17526 // Now that we have a primitive, we need to check if it's part of a range.
17527 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17528 pm_token_t operator = parser->previous;
17529
17530 // Now that we have the operator, we need to check if this is followed
17531 // by another expression. If it is, then we will create a full range
17532 // node. Otherwise, we'll create an endless range.
17533 switch (parser->current.type) {
17534 case PM_CASE_PRIMITIVE: {
17535 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17536 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17537 }
17538 default:
17539 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17540 }
17541 }
17542
17543 return node;
17544 }
17545 case PM_TOKEN_CARET: {
17546 parser_lex(parser);
17547 pm_token_t operator = parser->previous;
17548
17549 // At this point we have a pin operator. We need to check the subsequent
17550 // expression to determine if it's a variable or an expression.
17551 switch (parser->current.type) {
17552 case PM_TOKEN_IDENTIFIER: {
17553 parser_lex(parser);
17554 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17555
17556 if (variable == NULL) {
17557 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17558 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17559 }
17560
17561 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17562 }
17563 case PM_TOKEN_INSTANCE_VARIABLE: {
17564 parser_lex(parser);
17565 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17566
17567 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17568 }
17569 case PM_TOKEN_CLASS_VARIABLE: {
17570 parser_lex(parser);
17571 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17572
17573 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17574 }
17575 case PM_TOKEN_GLOBAL_VARIABLE: {
17576 parser_lex(parser);
17577 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17578
17579 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17580 }
17581 case PM_TOKEN_NUMBERED_REFERENCE: {
17582 parser_lex(parser);
17583 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17584
17585 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17586 }
17587 case PM_TOKEN_BACK_REFERENCE: {
17588 parser_lex(parser);
17589 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17590
17591 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17592 }
17593 case PM_TOKEN_PARENTHESIS_LEFT: {
17594 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17595 parser->pattern_matching_newlines = false;
17596
17597 pm_token_t lparen = parser->current;
17598 parser_lex(parser);
17599
17600 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17601 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17602
17603 accept1(parser, PM_TOKEN_NEWLINE);
17604 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17605 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17606 }
17607 default: {
17608 // If we get here, then we have a pin operator followed by something
17609 // not understood. We'll create a missing node and return that.
17610 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17611 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17612 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17613 }
17614 }
17615 }
17616 case PM_TOKEN_UCOLON_COLON: {
17617 pm_token_t delimiter = parser->current;
17618 parser_lex(parser);
17619
17620 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17621 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17622
17623 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17624 }
17625 case PM_TOKEN_CONSTANT: {
17626 pm_token_t constant = parser->current;
17627 parser_lex(parser);
17628
17629 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17630 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17631 }
17632 default:
17633 pm_parser_err_current(parser, diag_id);
17634 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17635 }
17636}
17637
17642static pm_node_t *
17643parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17644 pm_node_t *node = first_node;
17645
17646 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17647 pm_token_t operator = parser->previous;
17648
17649 switch (parser->current.type) {
17650 case PM_TOKEN_IDENTIFIER:
17651 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17652 case PM_TOKEN_BRACE_LEFT:
17653 case PM_TOKEN_CARET:
17654 case PM_TOKEN_CONSTANT:
17655 case PM_TOKEN_UCOLON_COLON:
17656 case PM_TOKEN_UDOT_DOT:
17657 case PM_TOKEN_UDOT_DOT_DOT:
17658 case PM_CASE_PRIMITIVE: {
17659 if (node == NULL) {
17660 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17661 } else {
17662 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17663 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17664 }
17665
17666 break;
17667 }
17668 case PM_TOKEN_PARENTHESIS_LEFT:
17669 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17670 pm_token_t opening = parser->current;
17671 parser_lex(parser);
17672
17673 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17674 accept1(parser, PM_TOKEN_NEWLINE);
17675 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17676 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17677
17678 if (node == NULL) {
17679 node = right;
17680 } else {
17681 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17682 }
17683
17684 break;
17685 }
17686 default: {
17687 pm_parser_err_current(parser, diag_id);
17688 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17689
17690 if (node == NULL) {
17691 node = right;
17692 } else {
17693 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17694 }
17695
17696 break;
17697 }
17698 }
17699 }
17700
17701 // If we have an =>, then we are assigning this pattern to a variable.
17702 // In this case we should create an assignment node.
17703 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17704 pm_token_t operator = parser->previous;
17705 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17706
17707 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17708 int depth;
17709
17710 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17711 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17712 }
17713
17714 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17715 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17716 parser,
17717 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17718 constant_id,
17719 (uint32_t) (depth == -1 ? 0 : depth)
17720 );
17721
17722 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17723 }
17724
17725 return node;
17726}
17727
17731static pm_node_t *
17732parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17733 pm_node_t *node = NULL;
17734
17735 bool leading_rest = false;
17736 bool trailing_rest = false;
17737
17738 switch (parser->current.type) {
17739 case PM_TOKEN_LABEL: {
17740 parser_lex(parser);
17741 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17742 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17743
17744 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17745 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17746 }
17747
17748 return node;
17749 }
17750 case PM_TOKEN_USTAR_STAR: {
17751 node = parse_pattern_keyword_rest(parser, captures);
17752 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17753
17754 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17755 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17756 }
17757
17758 return node;
17759 }
17760 case PM_TOKEN_STRING_BEGIN: {
17761 // We need special handling for string beginnings because they could
17762 // be dynamic symbols leading to hash patterns.
17763 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17764
17765 if (pm_symbol_node_label_p(node)) {
17766 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17767
17768 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17769 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17770 }
17771
17772 return node;
17773 }
17774
17775 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17776 break;
17777 }
17778 case PM_TOKEN_USTAR: {
17779 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17780 parser_lex(parser);
17781 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17782 leading_rest = true;
17783 break;
17784 }
17785 }
17787 default:
17788 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17789 break;
17790 }
17791
17792 // If we got a dynamic label symbol, then we need to treat it like the
17793 // beginning of a hash pattern.
17794 if (pm_symbol_node_label_p(node)) {
17795 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17796 }
17797
17798 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17799 // If we have a comma, then we are now parsing either an array pattern
17800 // or a find pattern. We need to parse all of the patterns, put them
17801 // into a big list, and then determine which type of node we have.
17802 pm_node_list_t nodes = { 0 };
17803 pm_node_list_append(&nodes, node);
17804
17805 // Gather up all of the patterns into the list.
17806 while (accept1(parser, PM_TOKEN_COMMA)) {
17807 // Break early here in case we have a trailing comma.
17808 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17809 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17810 pm_node_list_append(&nodes, node);
17811 trailing_rest = true;
17812 break;
17813 }
17814
17815 if (accept1(parser, PM_TOKEN_USTAR)) {
17816 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17817
17818 // If we have already parsed a splat pattern, then this is an
17819 // error. We will continue to parse the rest of the patterns,
17820 // but we will indicate it as an error.
17821 if (trailing_rest) {
17822 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17823 }
17824
17825 trailing_rest = true;
17826 } else {
17827 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17828 }
17829
17830 pm_node_list_append(&nodes, node);
17831 }
17832
17833 // If the first pattern and the last pattern are rest patterns, then we
17834 // will call this a find pattern, regardless of how many rest patterns
17835 // are in between because we know we already added the appropriate
17836 // errors. Otherwise we will create an array pattern.
17837 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17838 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17839
17840 if (nodes.size == 2) {
17841 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17842 }
17843 } else {
17844 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17845
17846 if (leading_rest && trailing_rest) {
17847 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17848 }
17849 }
17850
17851 xfree(nodes.nodes);
17852 } else if (leading_rest) {
17853 // Otherwise, if we parsed a single splat pattern, then we know we have
17854 // an array pattern, so we can go ahead and create that node.
17855 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17856 }
17857
17858 return node;
17859}
17860
17866static inline void
17867parse_negative_numeric(pm_node_t *node) {
17868 switch (PM_NODE_TYPE(node)) {
17869 case PM_INTEGER_NODE: {
17870 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17871 cast->base.location.start--;
17872 cast->value.negative = true;
17873 break;
17874 }
17875 case PM_FLOAT_NODE: {
17876 pm_float_node_t *cast = (pm_float_node_t *) node;
17877 cast->base.location.start--;
17878 cast->value = -cast->value;
17879 break;
17880 }
17881 case PM_RATIONAL_NODE: {
17882 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17883 cast->base.location.start--;
17884 cast->numerator.negative = true;
17885 break;
17886 }
17887 case PM_IMAGINARY_NODE:
17888 node->location.start--;
17889 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17890 break;
17891 default:
17892 assert(false && "unreachable");
17893 break;
17894 }
17895}
17896
17902static void
17903pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17904 switch (diag_id) {
17905 case PM_ERR_HASH_KEY: {
17906 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17907 break;
17908 }
17909 case PM_ERR_HASH_VALUE:
17910 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17911 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17912 break;
17913 }
17914 case PM_ERR_UNARY_RECEIVER: {
17915 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17916 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17917 break;
17918 }
17919 case PM_ERR_UNARY_DISALLOWED:
17920 case PM_ERR_EXPECT_ARGUMENT: {
17921 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17922 break;
17923 }
17924 default:
17925 pm_parser_err_previous(parser, diag_id);
17926 break;
17927 }
17928}
17929
17933static void
17934parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17935#define CONTEXT_NONE 0
17936#define CONTEXT_THROUGH_ENSURE 1
17937#define CONTEXT_THROUGH_ELSE 2
17938
17939 pm_context_node_t *context_node = parser->current_context;
17940 int context = CONTEXT_NONE;
17941
17942 while (context_node != NULL) {
17943 switch (context_node->context) {
17951 case PM_CONTEXT_DEFINED:
17953 // These are the good cases. We're allowed to have a retry here.
17954 return;
17955 case PM_CONTEXT_CLASS:
17956 case PM_CONTEXT_DEF:
17958 case PM_CONTEXT_MAIN:
17959 case PM_CONTEXT_MODULE:
17960 case PM_CONTEXT_PREEXE:
17961 case PM_CONTEXT_SCLASS:
17962 // These are the bad cases. We're not allowed to have a retry in
17963 // these contexts.
17964 if (context == CONTEXT_NONE) {
17965 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17966 } else if (context == CONTEXT_THROUGH_ENSURE) {
17967 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17968 } else if (context == CONTEXT_THROUGH_ELSE) {
17969 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17970 }
17971 return;
17979 // These are also bad cases, but with a more specific error
17980 // message indicating the else.
17981 context = CONTEXT_THROUGH_ELSE;
17982 break;
17990 // These are also bad cases, but with a more specific error
17991 // message indicating the ensure.
17992 context = CONTEXT_THROUGH_ENSURE;
17993 break;
17994 case PM_CONTEXT_NONE:
17995 // This case should never happen.
17996 assert(false && "unreachable");
17997 break;
17998 case PM_CONTEXT_BEGIN:
18001 case PM_CONTEXT_CASE_IN:
18004 case PM_CONTEXT_ELSE:
18005 case PM_CONTEXT_ELSIF:
18006 case PM_CONTEXT_EMBEXPR:
18008 case PM_CONTEXT_FOR:
18009 case PM_CONTEXT_IF:
18014 case PM_CONTEXT_PARENS:
18015 case PM_CONTEXT_POSTEXE:
18017 case PM_CONTEXT_TERNARY:
18018 case PM_CONTEXT_UNLESS:
18019 case PM_CONTEXT_UNTIL:
18020 case PM_CONTEXT_WHILE:
18021 // In these contexts we should continue walking up the list of
18022 // contexts.
18023 break;
18024 }
18025
18026 context_node = context_node->prev;
18027 }
18028
18029#undef CONTEXT_NONE
18030#undef CONTEXT_ENSURE
18031#undef CONTEXT_ELSE
18032}
18033
18037static void
18038parse_yield(pm_parser_t *parser, const pm_node_t *node) {
18039 pm_context_node_t *context_node = parser->current_context;
18040
18041 while (context_node != NULL) {
18042 switch (context_node->context) {
18043 case PM_CONTEXT_DEF:
18045 case PM_CONTEXT_DEFINED:
18049 // These are the good cases. We're allowed to have a block exit
18050 // in these contexts.
18051 return;
18052 case PM_CONTEXT_CLASS:
18056 case PM_CONTEXT_MAIN:
18057 case PM_CONTEXT_MODULE:
18061 case PM_CONTEXT_SCLASS:
18065 // These are the bad cases. We're not allowed to have a retry in
18066 // these contexts.
18067 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
18068 return;
18069 case PM_CONTEXT_NONE:
18070 // This case should never happen.
18071 assert(false && "unreachable");
18072 break;
18073 case PM_CONTEXT_BEGIN:
18082 case PM_CONTEXT_CASE_IN:
18085 case PM_CONTEXT_ELSE:
18086 case PM_CONTEXT_ELSIF:
18087 case PM_CONTEXT_EMBEXPR:
18089 case PM_CONTEXT_FOR:
18090 case PM_CONTEXT_IF:
18098 case PM_CONTEXT_PARENS:
18099 case PM_CONTEXT_POSTEXE:
18101 case PM_CONTEXT_PREEXE:
18103 case PM_CONTEXT_TERNARY:
18104 case PM_CONTEXT_UNLESS:
18105 case PM_CONTEXT_UNTIL:
18106 case PM_CONTEXT_WHILE:
18107 // In these contexts we should continue walking up the list of
18108 // contexts.
18109 break;
18110 }
18111
18112 context_node = context_node->prev;
18113 }
18114}
18115
18120typedef struct {
18123
18125 const uint8_t *start;
18126
18128 const uint8_t *end;
18129
18138
18143static void
18144parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18146 pm_location_t location;
18147
18148 if (callback_data->shared) {
18149 location = (pm_location_t) { .start = start, .end = end };
18150 } else {
18151 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18152 }
18153
18154 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18155}
18156
18160static void
18161parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18162 const pm_string_t *unescaped = &node->unescaped;
18164 .parser = parser,
18165 .start = node->base.location.start,
18166 .end = node->base.location.end,
18167 .shared = unescaped->type == PM_STRING_SHARED
18168 };
18169
18170 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18171}
18172
18176static inline pm_node_t *
18177parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18178 switch (parser->current.type) {
18179 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
18180 parser_lex(parser);
18181
18182 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18183 pm_accepts_block_stack_push(parser, true);
18184 bool parsed_bare_hash = false;
18185
18186 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18187 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18188
18189 // Handle the case where we don't have a comma and we have a
18190 // newline followed by a right bracket.
18191 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18192 break;
18193 }
18194
18195 // Ensure that we have a comma between elements in the array.
18196 if (array->elements.size > 0) {
18197 if (accept1(parser, PM_TOKEN_COMMA)) {
18198 // If there was a comma but we also accepts a newline,
18199 // then this is a syntax error.
18200 if (accepted_newline) {
18201 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18202 }
18203 } else {
18204 // If there was no comma, then we need to add a syntax
18205 // error.
18206 const uint8_t *location = parser->previous.end;
18207 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18208
18209 parser->previous.start = location;
18210 parser->previous.type = PM_TOKEN_MISSING;
18211 }
18212 }
18213
18214 // If we have a right bracket immediately following a comma,
18215 // this is allowed since it's a trailing comma. In this case we
18216 // can break out of the loop.
18217 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18218
18219 pm_node_t *element;
18220
18221 if (accept1(parser, PM_TOKEN_USTAR)) {
18222 pm_token_t operator = parser->previous;
18223 pm_node_t *expression = NULL;
18224
18225 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18226 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18227 } else {
18228 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18229 }
18230
18231 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18232 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18233 if (parsed_bare_hash) {
18234 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18235 }
18236
18237 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18238 pm_static_literals_t hash_keys = { 0 };
18239
18240 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
18241 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18242 }
18243
18244 pm_static_literals_free(&hash_keys);
18245 parsed_bare_hash = true;
18246 } else {
18247 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18248
18249 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18250 if (parsed_bare_hash) {
18251 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18252 }
18253
18254 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18255 pm_static_literals_t hash_keys = { 0 };
18256 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18257
18258 pm_token_t operator;
18259 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18260 operator = parser->previous;
18261 } else {
18262 operator = not_provided(parser);
18263 }
18264
18265 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18266 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18267 pm_keyword_hash_node_elements_append(hash, assoc);
18268
18269 element = (pm_node_t *) hash;
18270 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18271 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18272 }
18273
18274 pm_static_literals_free(&hash_keys);
18275 parsed_bare_hash = true;
18276 }
18277 }
18278
18279 pm_array_node_elements_append(array, element);
18280 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18281 }
18282
18283 accept1(parser, PM_TOKEN_NEWLINE);
18284
18285 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18286 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18287 parser->previous.start = parser->previous.end;
18288 parser->previous.type = PM_TOKEN_MISSING;
18289 }
18290
18291 pm_array_node_close_set(array, &parser->previous);
18292 pm_accepts_block_stack_pop(parser);
18293
18294 return (pm_node_t *) array;
18295 }
18296 case PM_TOKEN_PARENTHESIS_LEFT:
18297 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18298 pm_token_t opening = parser->current;
18299 pm_node_flags_t flags = 0;
18300
18301 pm_node_list_t current_block_exits = { 0 };
18302 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18303
18304 parser_lex(parser);
18305 while (true) {
18306 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18307 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18308 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18309 break;
18310 }
18311 }
18312
18313 // If this is the end of the file or we match a right parenthesis, then
18314 // we have an empty parentheses node, and we can immediately return.
18315 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18316 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18317
18318 pop_block_exits(parser, previous_block_exits);
18319 pm_node_list_free(&current_block_exits);
18320
18321 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18322 }
18323
18324 // Otherwise, we're going to parse the first statement in the list
18325 // of statements within the parentheses.
18326 pm_accepts_block_stack_push(parser, true);
18327 context_push(parser, PM_CONTEXT_PARENS);
18328 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18329 context_pop(parser);
18330
18331 // Determine if this statement is followed by a terminator. In the
18332 // case of a single statement, this is fine. But in the case of
18333 // multiple statements it's required.
18334 bool terminator_found = false;
18335
18336 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18337 terminator_found = true;
18338 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18339 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18340 terminator_found = true;
18341 }
18342
18343 if (terminator_found) {
18344 while (true) {
18345 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18346 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18347 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18348 break;
18349 }
18350 }
18351 }
18352
18353 // If we hit a right parenthesis, then we're done parsing the
18354 // parentheses node, and we can check which kind of node we should
18355 // return.
18356 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18357 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18358 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18359 }
18360
18361 parser_lex(parser);
18362 pm_accepts_block_stack_pop(parser);
18363
18364 pop_block_exits(parser, previous_block_exits);
18365 pm_node_list_free(&current_block_exits);
18366
18367 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18368 // If we have a single statement and are ending on a right
18369 // parenthesis, then we need to check if this is possibly a
18370 // multiple target node.
18371 pm_multi_target_node_t *multi_target;
18372
18373 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18374 multi_target = (pm_multi_target_node_t *) statement;
18375 } else {
18376 multi_target = pm_multi_target_node_create(parser);
18377 pm_multi_target_node_targets_append(parser, multi_target, statement);
18378 }
18379
18380 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18381 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18382
18383 multi_target->lparen_loc = lparen_loc;
18384 multi_target->rparen_loc = rparen_loc;
18385 multi_target->base.location.start = lparen_loc.start;
18386 multi_target->base.location.end = rparen_loc.end;
18387
18388 pm_node_t *result;
18389 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18390 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18391 accept1(parser, PM_TOKEN_NEWLINE);
18392 } else {
18393 result = (pm_node_t *) multi_target;
18394 }
18395
18396 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18397 // All set, this is explicitly allowed by the parent
18398 // context.
18399 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18400 // All set, we're inside a for loop and we're parsing
18401 // multiple targets.
18402 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18403 // Multi targets are not allowed when it's not a
18404 // statement level.
18405 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18406 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18407 // Multi targets must be followed by an equal sign in
18408 // order to be valid (or a right parenthesis if they are
18409 // nested).
18410 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18411 }
18412
18413 return result;
18414 }
18415
18416 // If we have a single statement and are ending on a right parenthesis
18417 // and we didn't return a multiple assignment node, then we can return a
18418 // regular parentheses node now.
18419 pm_statements_node_t *statements = pm_statements_node_create(parser);
18420 pm_statements_node_body_append(parser, statements, statement, true);
18421
18422 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18423 }
18424
18425 // If we have more than one statement in the set of parentheses,
18426 // then we are going to parse all of them as a list of statements.
18427 // We'll do that here.
18428 context_push(parser, PM_CONTEXT_PARENS);
18429 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18430
18431 pm_statements_node_t *statements = pm_statements_node_create(parser);
18432 pm_statements_node_body_append(parser, statements, statement, true);
18433
18434 // If we didn't find a terminator and we didn't find a right
18435 // parenthesis, then this is a syntax error.
18436 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18437 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18438 }
18439
18440 // Parse each statement within the parentheses.
18441 while (true) {
18442 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18443 pm_statements_node_body_append(parser, statements, node, true);
18444
18445 // If we're recovering from a syntax error, then we need to stop
18446 // parsing the statements now.
18447 if (parser->recovering) {
18448 // If this is the level of context where the recovery has
18449 // happened, then we can mark the parser as done recovering.
18450 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18451 break;
18452 }
18453
18454 // If we couldn't parse an expression at all, then we need to
18455 // bail out of the loop.
18456 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18457
18458 // If we successfully parsed a statement, then we are going to
18459 // need terminator to delimit them.
18460 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18461 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18462 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18463 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18464 break;
18465 } else if (!match1(parser, PM_TOKEN_EOF)) {
18466 // If we're at the end of the file, then we're going to add
18467 // an error after this for the ) anyway.
18468 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18469 }
18470 }
18471
18472 context_pop(parser);
18473 pm_accepts_block_stack_pop(parser);
18474 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18475
18476 // When we're parsing multi targets, we allow them to be followed by
18477 // a right parenthesis if they are at the statement level. This is
18478 // only possible if they are the final statement in a parentheses.
18479 // We need to explicitly reject that here.
18480 {
18481 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18482
18483 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18484 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18485 pm_multi_target_node_targets_append(parser, multi_target, statement);
18486
18487 statement = (pm_node_t *) multi_target;
18488 statements->body.nodes[statements->body.size - 1] = statement;
18489 }
18490
18491 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18492 const uint8_t *offset = statement->location.end;
18493 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18494 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18495
18496 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18497 statements->body.nodes[statements->body.size - 1] = statement;
18498
18499 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18500 }
18501 }
18502
18503 pop_block_exits(parser, previous_block_exits);
18504 pm_node_list_free(&current_block_exits);
18505
18506 pm_void_statements_check(parser, statements, true);
18507 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18508 }
18509 case PM_TOKEN_BRACE_LEFT: {
18510 // If we were passed a current_hash_keys via the parser, then that
18511 // means we're already parsing a hash and we want to share the set
18512 // of hash keys with this inner hash we're about to parse for the
18513 // sake of warnings. We'll set it to NULL after we grab it to make
18514 // sure subsequent expressions don't use it. Effectively this is a
18515 // way of getting around passing it to every call to
18516 // parse_expression.
18517 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18518 parser->current_hash_keys = NULL;
18519
18520 pm_accepts_block_stack_push(parser, true);
18521 parser_lex(parser);
18522
18523 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18524
18525 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18526 if (current_hash_keys != NULL) {
18527 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18528 } else {
18529 pm_static_literals_t hash_keys = { 0 };
18530 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18531 pm_static_literals_free(&hash_keys);
18532 }
18533
18534 accept1(parser, PM_TOKEN_NEWLINE);
18535 }
18536
18537 pm_accepts_block_stack_pop(parser);
18538 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18539 pm_hash_node_closing_loc_set(node, &parser->previous);
18540
18541 return (pm_node_t *) node;
18542 }
18543 case PM_TOKEN_CHARACTER_LITERAL: {
18544 pm_token_t closing = not_provided(parser);
18545 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(
18546 parser,
18547 &(pm_token_t) {
18548 .type = PM_TOKEN_STRING_BEGIN,
18549 .start = parser->current.start,
18550 .end = parser->current.start + 1
18551 },
18552 &(pm_token_t) {
18553 .type = PM_TOKEN_STRING_CONTENT,
18554 .start = parser->current.start + 1,
18555 .end = parser->current.end
18556 },
18557 &closing
18558 );
18559
18560 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18561
18562 // Skip past the character literal here, since now we have handled
18563 // parser->explicit_encoding correctly.
18564 parser_lex(parser);
18565
18566 // Characters can be followed by strings in which case they are
18567 // automatically concatenated.
18568 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18569 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18570 }
18571
18572 return node;
18573 }
18574 case PM_TOKEN_CLASS_VARIABLE: {
18575 parser_lex(parser);
18576 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18577
18578 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18579 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18580 }
18581
18582 return node;
18583 }
18584 case PM_TOKEN_CONSTANT: {
18585 parser_lex(parser);
18586 pm_token_t constant = parser->previous;
18587
18588 // If a constant is immediately followed by parentheses, then this is in
18589 // fact a method call, not a constant read.
18590 if (
18591 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18592 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18593 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18594 match1(parser, PM_TOKEN_BRACE_LEFT)
18595 ) {
18596 pm_arguments_t arguments = { 0 };
18597 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18598 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18599 }
18600
18601 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18602
18603 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18604 // If we get here, then we have a comma immediately following a
18605 // constant, so we're going to parse this as a multiple assignment.
18606 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18607 }
18608
18609 return node;
18610 }
18611 case PM_TOKEN_UCOLON_COLON: {
18612 parser_lex(parser);
18613 pm_token_t delimiter = parser->previous;
18614
18615 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18616 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18617
18618 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18619 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18620 }
18621
18622 return node;
18623 }
18624 case PM_TOKEN_UDOT_DOT:
18625 case PM_TOKEN_UDOT_DOT_DOT: {
18626 pm_token_t operator = parser->current;
18627 parser_lex(parser);
18628
18629 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18630
18631 // Unary .. and ... are special because these are non-associative
18632 // operators that can also be unary operators. In this case we need
18633 // to explicitly reject code that has a .. or ... that follows this
18634 // expression.
18635 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18636 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18637 }
18638
18639 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18640 }
18641 case PM_TOKEN_FLOAT:
18642 parser_lex(parser);
18643 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18644 case PM_TOKEN_FLOAT_IMAGINARY:
18645 parser_lex(parser);
18646 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18647 case PM_TOKEN_FLOAT_RATIONAL:
18648 parser_lex(parser);
18649 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18650 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
18651 parser_lex(parser);
18652 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18653 case PM_TOKEN_NUMBERED_REFERENCE: {
18654 parser_lex(parser);
18655 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18656
18657 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18658 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18659 }
18660
18661 return node;
18662 }
18663 case PM_TOKEN_GLOBAL_VARIABLE: {
18664 parser_lex(parser);
18665 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18666
18667 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18668 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18669 }
18670
18671 return node;
18672 }
18673 case PM_TOKEN_BACK_REFERENCE: {
18674 parser_lex(parser);
18675 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18676
18677 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18678 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18679 }
18680
18681 return node;
18682 }
18683 case PM_TOKEN_IDENTIFIER:
18684 case PM_TOKEN_METHOD_NAME: {
18685 parser_lex(parser);
18686 pm_token_t identifier = parser->previous;
18687 pm_node_t *node = parse_variable_call(parser);
18688
18689 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18690 // If parse_variable_call returned with a call node, then we
18691 // know the identifier is not in the local table. In that case
18692 // we need to check if there are arguments following the
18693 // identifier.
18694 pm_call_node_t *call = (pm_call_node_t *) node;
18695 pm_arguments_t arguments = { 0 };
18696
18697 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18698 // Since we found arguments, we need to turn off the
18699 // variable call bit in the flags.
18700 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18701
18702 call->opening_loc = arguments.opening_loc;
18703 call->arguments = arguments.arguments;
18704 call->closing_loc = arguments.closing_loc;
18705 call->block = arguments.block;
18706
18707 const uint8_t *end = pm_arguments_end(&arguments);
18708 if (!end) {
18709 end = call->message_loc.end;
18710 }
18711 call->base.location.end = end;
18712 }
18713 } else {
18714 // Otherwise, we know the identifier is in the local table. This
18715 // can still be a method call if it is followed by arguments or
18716 // a block, so we need to check for that here.
18717 if (
18718 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18719 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18720 match1(parser, PM_TOKEN_BRACE_LEFT)
18721 ) {
18722 pm_arguments_t arguments = { 0 };
18723 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18724 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18725
18726 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18727 // If we're about to convert an 'it' implicit local
18728 // variable read into a method call, we need to remove
18729 // it from the list of implicit local variables.
18730 parse_target_implicit_parameter(parser, node);
18731 } else {
18732 // Otherwise, we're about to convert a regular local
18733 // variable read into a method call, in which case we
18734 // need to indicate that this was not a read for the
18735 // purposes of warnings.
18736 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18737
18738 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18739 parse_target_implicit_parameter(parser, node);
18740 } else {
18742 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18743 }
18744 }
18745
18746 pm_node_destroy(parser, node);
18747 return (pm_node_t *) fcall;
18748 }
18749 }
18750
18751 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18752 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18753 }
18754
18755 return node;
18756 }
18757 case PM_TOKEN_HEREDOC_START: {
18758 // Here we have found a heredoc. We'll parse it and add it to the
18759 // list of strings.
18760 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18761 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18762
18763 size_t common_whitespace = (size_t) -1;
18764 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18765
18766 parser_lex(parser);
18767 pm_token_t opening = parser->previous;
18768
18769 pm_node_t *node;
18770 pm_node_t *part;
18771
18772 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18773 // If we get here, then we have an empty heredoc. We'll create
18774 // an empty content token and return an empty string node.
18775 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18776 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18777
18778 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18779 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18780 } else {
18781 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18782 }
18783
18784 node->location.end = opening.end;
18785 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18786 // If we get here, then we tried to find something in the
18787 // heredoc but couldn't actually parse anything, so we'll just
18788 // return a missing node.
18789 //
18790 // parse_string_part handles its own errors, so there is no need
18791 // for us to add one here.
18792 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18793 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18794 // If we get here, then the part that we parsed was plain string
18795 // content and we're at the end of the heredoc, so we can return
18796 // just a string node with the heredoc opening and closing as
18797 // its opening and closing.
18798 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18799 pm_string_node_t *cast = (pm_string_node_t *) part;
18800
18801 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18802 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18803 cast->base.location = cast->opening_loc;
18804
18805 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18806 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18807 cast->base.type = PM_X_STRING_NODE;
18808 }
18809
18810 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18811 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18812 }
18813
18814 node = (pm_node_t *) cast;
18815 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18816 } else {
18817 // If we get here, then we have multiple parts in the heredoc,
18818 // so we'll need to create an interpolated string node to hold
18819 // them all.
18820 pm_node_list_t parts = { 0 };
18821 pm_node_list_append(&parts, part);
18822
18823 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18824 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18825 pm_node_list_append(&parts, part);
18826 }
18827 }
18828
18829 // Now that we have all of the parts, create the correct type of
18830 // interpolated node.
18831 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18832 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18833 cast->parts = parts;
18834
18835 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18836 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18837
18838 cast->base.location = cast->opening_loc;
18839 node = (pm_node_t *) cast;
18840 } else {
18841 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18842 pm_node_list_free(&parts);
18843
18844 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18845 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18846
18847 cast->base.location = cast->opening_loc;
18848 node = (pm_node_t *) cast;
18849 }
18850
18851 // If this is a heredoc that is indented with a ~, then we need
18852 // to dedent each line by the common leading whitespace.
18853 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18854 pm_node_list_t *nodes;
18855 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18856 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18857 } else {
18858 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18859 }
18860
18861 parse_heredoc_dedent(parser, nodes, common_whitespace);
18862 }
18863 }
18864
18865 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18866 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18867 }
18868
18869 return node;
18870 }
18871 case PM_TOKEN_INSTANCE_VARIABLE: {
18872 parser_lex(parser);
18873 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18874
18875 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18876 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18877 }
18878
18879 return node;
18880 }
18881 case PM_TOKEN_INTEGER: {
18882 pm_node_flags_t base = parser->integer_base;
18883 parser_lex(parser);
18884 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18885 }
18886 case PM_TOKEN_INTEGER_IMAGINARY: {
18887 pm_node_flags_t base = parser->integer_base;
18888 parser_lex(parser);
18889 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18890 }
18891 case PM_TOKEN_INTEGER_RATIONAL: {
18892 pm_node_flags_t base = parser->integer_base;
18893 parser_lex(parser);
18894 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18895 }
18896 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18897 pm_node_flags_t base = parser->integer_base;
18898 parser_lex(parser);
18899 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18900 }
18901 case PM_TOKEN_KEYWORD___ENCODING__:
18902 parser_lex(parser);
18903 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18904 case PM_TOKEN_KEYWORD___FILE__:
18905 parser_lex(parser);
18906 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18907 case PM_TOKEN_KEYWORD___LINE__:
18908 parser_lex(parser);
18909 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18910 case PM_TOKEN_KEYWORD_ALIAS: {
18911 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18912 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18913 }
18914
18915 parser_lex(parser);
18916 pm_token_t keyword = parser->previous;
18917
18918 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18919 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18920
18921 switch (PM_NODE_TYPE(new_name)) {
18922 case PM_BACK_REFERENCE_READ_NODE:
18923 case PM_NUMBERED_REFERENCE_READ_NODE:
18924 case PM_GLOBAL_VARIABLE_READ_NODE: {
18925 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18926 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18927 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18928 }
18929 } else {
18930 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18931 }
18932
18933 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18934 }
18935 case PM_SYMBOL_NODE:
18936 case PM_INTERPOLATED_SYMBOL_NODE: {
18937 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18938 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18939 }
18940 }
18942 default:
18943 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18944 }
18945 }
18946 case PM_TOKEN_KEYWORD_CASE: {
18947 size_t opening_newline_index = token_newline_index(parser);
18948 parser_lex(parser);
18949
18950 pm_token_t case_keyword = parser->previous;
18951 pm_node_t *predicate = NULL;
18952
18953 pm_node_list_t current_block_exits = { 0 };
18954 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18955
18956 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18957 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18958 predicate = NULL;
18959 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18960 predicate = NULL;
18961 } else if (!token_begins_expression_p(parser->current.type)) {
18962 predicate = NULL;
18963 } else {
18964 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18965 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18966 }
18967
18968 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18969 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18970 parser_lex(parser);
18971
18972 pop_block_exits(parser, previous_block_exits);
18973 pm_node_list_free(&current_block_exits);
18974
18975 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18976 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18977 }
18978
18979 // At this point we can create a case node, though we don't yet know
18980 // if it is a case-in or case-when node.
18981 pm_token_t end_keyword = not_provided(parser);
18982 pm_node_t *node;
18983
18984 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18985 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18986 pm_static_literals_t literals = { 0 };
18987
18988 // At this point we've seen a when keyword, so we know this is a
18989 // case-when node. We will continue to parse the when nodes
18990 // until we hit the end of the list.
18991 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18992 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18993 parser_lex(parser);
18994
18995 pm_token_t when_keyword = parser->previous;
18996 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18997
18998 do {
18999 if (accept1(parser, PM_TOKEN_USTAR)) {
19000 pm_token_t operator = parser->previous;
19001 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19002
19003 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
19004 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
19005
19006 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
19007 } else {
19008 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
19009 pm_when_node_conditions_append(when_node, condition);
19010
19011 // If we found a missing node, then this is a syntax
19012 // error and we should stop looping.
19013 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
19014
19015 // If this is a string node, then we need to mark it
19016 // as frozen because when clause strings are frozen.
19017 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
19018 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
19019 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
19020 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
19021 }
19022
19023 pm_when_clause_static_literals_add(parser, &literals, condition);
19024 }
19025 } while (accept1(parser, PM_TOKEN_COMMA));
19026
19027 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19028 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
19029 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
19030 }
19031 } else {
19032 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
19033 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
19034 }
19035
19036 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19037 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
19038 if (statements != NULL) {
19039 pm_when_node_statements_set(when_node, statements);
19040 }
19041 }
19042
19043 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
19044 }
19045
19046 // If we didn't parse any conditions (in or when) then we need
19047 // to indicate that we have an error.
19048 if (case_node->conditions.size == 0) {
19049 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19050 }
19051
19052 pm_static_literals_free(&literals);
19053 node = (pm_node_t *) case_node;
19054 } else {
19055 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
19056
19057 // If this is a case-match node (i.e., it is a pattern matching
19058 // case statement) then we must have a predicate.
19059 if (predicate == NULL) {
19060 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
19061 }
19062
19063 // At this point we expect that we're parsing a case-in node. We
19064 // will continue to parse the in nodes until we hit the end of
19065 // the list.
19066 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
19067 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
19068
19069 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
19070 parser->pattern_matching_newlines = true;
19071
19072 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
19073 parser->command_start = false;
19074 parser_lex(parser);
19075
19076 pm_token_t in_keyword = parser->previous;
19077
19078 pm_constant_id_list_t captures = { 0 };
19079 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
19080
19081 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
19082 pm_constant_id_list_free(&captures);
19083
19084 // Since we're in the top-level of the case-in node we need
19085 // to check for guard clauses in the form of `if` or
19086 // `unless` statements.
19087 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
19088 pm_token_t keyword = parser->previous;
19089 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
19090 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
19091 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
19092 pm_token_t keyword = parser->previous;
19093 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
19094 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
19095 }
19096
19097 // Now we need to check for the terminator of the in node's
19098 // pattern. It can be a newline or semicolon optionally
19099 // followed by a `then` keyword.
19100 pm_token_t then_keyword;
19101 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19102 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
19103 then_keyword = parser->previous;
19104 } else {
19105 then_keyword = not_provided(parser);
19106 }
19107 } else {
19108 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
19109 then_keyword = parser->previous;
19110 }
19111
19112 // Now we can actually parse the statements associated with
19113 // the in node.
19114 pm_statements_node_t *statements;
19115 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19116 statements = NULL;
19117 } else {
19118 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
19119 }
19120
19121 // Now that we have the full pattern and statements, we can
19122 // create the node and attach it to the case node.
19123 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
19124 pm_case_match_node_condition_append(case_node, condition);
19125 }
19126
19127 // If we didn't parse any conditions (in or when) then we need
19128 // to indicate that we have an error.
19129 if (case_node->conditions.size == 0) {
19130 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19131 }
19132
19133 node = (pm_node_t *) case_node;
19134 }
19135
19136 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19137 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19138 pm_token_t else_keyword = parser->previous;
19139 pm_else_node_t *else_node;
19140
19141 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19142 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19143 } else {
19144 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19145 }
19146
19147 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19148 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19149 } else {
19150 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19151 }
19152 }
19153
19154 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19155 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19156
19157 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19158 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19159 } else {
19160 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19161 }
19162
19163 pop_block_exits(parser, previous_block_exits);
19164 pm_node_list_free(&current_block_exits);
19165
19166 return node;
19167 }
19168 case PM_TOKEN_KEYWORD_BEGIN: {
19169 size_t opening_newline_index = token_newline_index(parser);
19170 parser_lex(parser);
19171
19172 pm_token_t begin_keyword = parser->previous;
19173 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19174
19175 pm_node_list_t current_block_exits = { 0 };
19176 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19177 pm_statements_node_t *begin_statements = NULL;
19178
19179 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19180 pm_accepts_block_stack_push(parser, true);
19181 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19182 pm_accepts_block_stack_pop(parser);
19183 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19184 }
19185
19186 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19187 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19188 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19189
19190 begin_node->base.location.end = parser->previous.end;
19191 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19192
19193 pop_block_exits(parser, previous_block_exits);
19194 pm_node_list_free(&current_block_exits);
19195
19196 return (pm_node_t *) begin_node;
19197 }
19198 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19199 pm_node_list_t current_block_exits = { 0 };
19200 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19201
19202 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19203 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19204 }
19205
19206 parser_lex(parser);
19207 pm_token_t keyword = parser->previous;
19208
19209 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19210 pm_token_t opening = parser->previous;
19211 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19212
19213 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19214 pm_context_t context = parser->current_context->context;
19215 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19216 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19217 }
19218
19219 flush_block_exits(parser, previous_block_exits);
19220 pm_node_list_free(&current_block_exits);
19221
19222 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19223 }
19224 case PM_TOKEN_KEYWORD_BREAK:
19225 case PM_TOKEN_KEYWORD_NEXT:
19226 case PM_TOKEN_KEYWORD_RETURN: {
19227 parser_lex(parser);
19228
19229 pm_token_t keyword = parser->previous;
19230 pm_arguments_t arguments = { 0 };
19231
19232 if (
19233 token_begins_expression_p(parser->current.type) ||
19234 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19235 ) {
19236 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19237
19238 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19239 pm_token_t next = parser->current;
19240 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19241
19242 // Reject `foo && return bar`.
19243 if (!accepts_command_call && arguments.arguments != NULL) {
19244 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
19245 }
19246 }
19247 }
19248
19249 switch (keyword.type) {
19250 case PM_TOKEN_KEYWORD_BREAK: {
19251 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19252 if (!parser->partial_script) parse_block_exit(parser, node);
19253 return node;
19254 }
19255 case PM_TOKEN_KEYWORD_NEXT: {
19256 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19257 if (!parser->partial_script) parse_block_exit(parser, node);
19258 return node;
19259 }
19260 case PM_TOKEN_KEYWORD_RETURN: {
19261 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19262 parse_return(parser, node);
19263 return node;
19264 }
19265 default:
19266 assert(false && "unreachable");
19267 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19268 }
19269 }
19270 case PM_TOKEN_KEYWORD_SUPER: {
19271 parser_lex(parser);
19272
19273 pm_token_t keyword = parser->previous;
19274 pm_arguments_t arguments = { 0 };
19275 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19276
19277 if (
19278 arguments.opening_loc.start == NULL &&
19279 arguments.arguments == NULL &&
19280 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19281 ) {
19282 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19283 }
19284
19285 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19286 }
19287 case PM_TOKEN_KEYWORD_YIELD: {
19288 parser_lex(parser);
19289
19290 pm_token_t keyword = parser->previous;
19291 pm_arguments_t arguments = { 0 };
19292 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19293
19294 // It's possible that we've parsed a block argument through our
19295 // call to parse_arguments_list. If we found one, we should mark it
19296 // as invalid and destroy it, as we don't have a place for it on the
19297 // yield node.
19298 if (arguments.block != NULL) {
19299 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19300 pm_node_destroy(parser, arguments.block);
19301 arguments.block = NULL;
19302 }
19303
19304 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19305 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19306
19307 return node;
19308 }
19309 case PM_TOKEN_KEYWORD_CLASS: {
19310 size_t opening_newline_index = token_newline_index(parser);
19311 parser_lex(parser);
19312
19313 pm_token_t class_keyword = parser->previous;
19314 pm_do_loop_stack_push(parser, false);
19315
19316 pm_node_list_t current_block_exits = { 0 };
19317 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19318
19319 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19320 pm_token_t operator = parser->previous;
19321 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19322
19323 pm_parser_scope_push(parser, true);
19324 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19325 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19326 }
19327
19328 pm_node_t *statements = NULL;
19329 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19330 pm_accepts_block_stack_push(parser, true);
19331 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19332 pm_accepts_block_stack_pop(parser);
19333 }
19334
19335 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19336 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19337 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19338 } else {
19339 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19340 }
19341
19342 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19343
19344 pm_constant_id_list_t locals;
19345 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19346
19347 pm_parser_scope_pop(parser);
19348 pm_do_loop_stack_pop(parser);
19349
19350 flush_block_exits(parser, previous_block_exits);
19351 pm_node_list_free(&current_block_exits);
19352
19353 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19354 }
19355
19356 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19357 pm_token_t name = parser->previous;
19358 if (name.type != PM_TOKEN_CONSTANT) {
19359 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19360 }
19361
19362 pm_token_t inheritance_operator;
19363 pm_node_t *superclass;
19364
19365 if (match1(parser, PM_TOKEN_LESS)) {
19366 inheritance_operator = parser->current;
19367 lex_state_set(parser, PM_LEX_STATE_BEG);
19368
19369 parser->command_start = true;
19370 parser_lex(parser);
19371
19372 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19373 } else {
19374 inheritance_operator = not_provided(parser);
19375 superclass = NULL;
19376 }
19377
19378 pm_parser_scope_push(parser, true);
19379
19380 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19381 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19382 } else {
19383 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19384 }
19385 pm_node_t *statements = NULL;
19386
19387 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19388 pm_accepts_block_stack_push(parser, true);
19389 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19390 pm_accepts_block_stack_pop(parser);
19391 }
19392
19393 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19394 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19395 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19396 } else {
19397 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19398 }
19399
19400 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19401
19402 if (context_def_p(parser)) {
19403 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19404 }
19405
19406 pm_constant_id_list_t locals;
19407 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19408
19409 pm_parser_scope_pop(parser);
19410 pm_do_loop_stack_pop(parser);
19411
19412 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19413 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19414 }
19415
19416 pop_block_exits(parser, previous_block_exits);
19417 pm_node_list_free(&current_block_exits);
19418
19419 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19420 }
19421 case PM_TOKEN_KEYWORD_DEF: {
19422 pm_node_list_t current_block_exits = { 0 };
19423 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19424
19425 pm_token_t def_keyword = parser->current;
19426 size_t opening_newline_index = token_newline_index(parser);
19427
19428 pm_node_t *receiver = NULL;
19429 pm_token_t operator = not_provided(parser);
19430 pm_token_t name;
19431
19432 // This context is necessary for lexing `...` in a bare params
19433 // correctly. It must be pushed before lexing the first param, so it
19434 // is here.
19435 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19436 parser_lex(parser);
19437
19438 // This will be false if the method name is not a valid identifier
19439 // but could be followed by an operator.
19440 bool valid_name = true;
19441
19442 switch (parser->current.type) {
19443 case PM_CASE_OPERATOR:
19444 pm_parser_scope_push(parser, true);
19445 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19446 parser_lex(parser);
19447
19448 name = parser->previous;
19449 break;
19450 case PM_TOKEN_IDENTIFIER: {
19451 parser_lex(parser);
19452
19453 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19454 receiver = parse_variable_call(parser);
19455
19456 pm_parser_scope_push(parser, true);
19457 lex_state_set(parser, PM_LEX_STATE_FNAME);
19458 parser_lex(parser);
19459
19460 operator = parser->previous;
19461 name = parse_method_definition_name(parser);
19462 } else {
19463 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19464 pm_parser_scope_push(parser, true);
19465
19466 name = parser->previous;
19467 }
19468
19469 break;
19470 }
19471 case PM_TOKEN_INSTANCE_VARIABLE:
19472 case PM_TOKEN_CLASS_VARIABLE:
19473 case PM_TOKEN_GLOBAL_VARIABLE:
19474 valid_name = false;
19476 case PM_TOKEN_CONSTANT:
19477 case PM_TOKEN_KEYWORD_NIL:
19478 case PM_TOKEN_KEYWORD_SELF:
19479 case PM_TOKEN_KEYWORD_TRUE:
19480 case PM_TOKEN_KEYWORD_FALSE:
19481 case PM_TOKEN_KEYWORD___FILE__:
19482 case PM_TOKEN_KEYWORD___LINE__:
19483 case PM_TOKEN_KEYWORD___ENCODING__: {
19484 pm_parser_scope_push(parser, true);
19485 parser_lex(parser);
19486
19487 pm_token_t identifier = parser->previous;
19488
19489 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19490 lex_state_set(parser, PM_LEX_STATE_FNAME);
19491 parser_lex(parser);
19492 operator = parser->previous;
19493
19494 switch (identifier.type) {
19495 case PM_TOKEN_CONSTANT:
19496 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19497 break;
19498 case PM_TOKEN_INSTANCE_VARIABLE:
19499 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19500 break;
19501 case PM_TOKEN_CLASS_VARIABLE:
19502 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19503 break;
19504 case PM_TOKEN_GLOBAL_VARIABLE:
19505 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19506 break;
19507 case PM_TOKEN_KEYWORD_NIL:
19508 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19509 break;
19510 case PM_TOKEN_KEYWORD_SELF:
19511 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19512 break;
19513 case PM_TOKEN_KEYWORD_TRUE:
19514 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19515 break;
19516 case PM_TOKEN_KEYWORD_FALSE:
19517 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19518 break;
19519 case PM_TOKEN_KEYWORD___FILE__:
19520 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19521 break;
19522 case PM_TOKEN_KEYWORD___LINE__:
19523 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19524 break;
19525 case PM_TOKEN_KEYWORD___ENCODING__:
19526 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19527 break;
19528 default:
19529 break;
19530 }
19531
19532 name = parse_method_definition_name(parser);
19533 } else {
19534 if (!valid_name) {
19535 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19536 }
19537
19538 name = identifier;
19539 }
19540 break;
19541 }
19542 case PM_TOKEN_PARENTHESIS_LEFT: {
19543 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19544 // the inner expression of this parenthesis should not be
19545 // processed under this context. Thus, the context is popped
19546 // here.
19547 context_pop(parser);
19548 parser_lex(parser);
19549
19550 pm_token_t lparen = parser->previous;
19551 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19552
19553 accept1(parser, PM_TOKEN_NEWLINE);
19554 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19555 pm_token_t rparen = parser->previous;
19556
19557 lex_state_set(parser, PM_LEX_STATE_FNAME);
19558 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19559
19560 operator = parser->previous;
19561 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19562
19563 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19564 // reason as described the above.
19565 pm_parser_scope_push(parser, true);
19566 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19567 name = parse_method_definition_name(parser);
19568 break;
19569 }
19570 default:
19571 pm_parser_scope_push(parser, true);
19572 name = parse_method_definition_name(parser);
19573 break;
19574 }
19575
19576 pm_token_t lparen;
19577 pm_token_t rparen;
19578 pm_parameters_node_t *params;
19579
19580 switch (parser->current.type) {
19581 case PM_TOKEN_PARENTHESIS_LEFT: {
19582 parser_lex(parser);
19583 lparen = parser->previous;
19584
19585 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19586 params = NULL;
19587 } else {
19588 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19589 }
19590
19591 lex_state_set(parser, PM_LEX_STATE_BEG);
19592 parser->command_start = true;
19593
19594 context_pop(parser);
19595 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19596 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19597 parser->previous.start = parser->previous.end;
19598 parser->previous.type = PM_TOKEN_MISSING;
19599 }
19600
19601 rparen = parser->previous;
19602 break;
19603 }
19604 case PM_CASE_PARAMETER: {
19605 // If we're about to lex a label, we need to add the label
19606 // state to make sure the next newline is ignored.
19607 if (parser->current.type == PM_TOKEN_LABEL) {
19608 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19609 }
19610
19611 lparen = not_provided(parser);
19612 rparen = not_provided(parser);
19613 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19614
19615 context_pop(parser);
19616 break;
19617 }
19618 default: {
19619 lparen = not_provided(parser);
19620 rparen = not_provided(parser);
19621 params = NULL;
19622
19623 context_pop(parser);
19624 break;
19625 }
19626 }
19627
19628 pm_node_t *statements = NULL;
19629 pm_token_t equal;
19630 pm_token_t end_keyword;
19631
19632 if (accept1(parser, PM_TOKEN_EQUAL)) {
19633 if (token_is_setter_name(&name)) {
19634 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19635 }
19636 equal = parser->previous;
19637
19638 context_push(parser, PM_CONTEXT_DEF);
19639 pm_do_loop_stack_push(parser, false);
19640 statements = (pm_node_t *) pm_statements_node_create(parser);
19641
19642 bool allow_command_call;
19643 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
19644 allow_command_call = accepts_command_call;
19645 } else {
19646 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
19647 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
19648 }
19649
19650 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19651
19652 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19653 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19654
19655 pm_token_t rescue_keyword = parser->previous;
19656 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19657 context_pop(parser);
19658
19659 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19660 }
19661
19662 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19663 pm_do_loop_stack_pop(parser);
19664 context_pop(parser);
19665 end_keyword = not_provided(parser);
19666 } else {
19667 equal = not_provided(parser);
19668
19669 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19670 lex_state_set(parser, PM_LEX_STATE_BEG);
19671 parser->command_start = true;
19672 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19673 } else {
19674 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19675 }
19676
19677 pm_accepts_block_stack_push(parser, true);
19678 pm_do_loop_stack_push(parser, false);
19679
19680 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19681 pm_accepts_block_stack_push(parser, true);
19682 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19683 pm_accepts_block_stack_pop(parser);
19684 }
19685
19686 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19687 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19688 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19689 } else {
19690 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19691 }
19692
19693 pm_accepts_block_stack_pop(parser);
19694 pm_do_loop_stack_pop(parser);
19695
19696 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19697 end_keyword = parser->previous;
19698 }
19699
19700 pm_constant_id_list_t locals;
19701 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19702 pm_parser_scope_pop(parser);
19703
19709 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19710
19711 flush_block_exits(parser, previous_block_exits);
19712 pm_node_list_free(&current_block_exits);
19713
19714 return (pm_node_t *) pm_def_node_create(
19715 parser,
19716 name_id,
19717 &name,
19718 receiver,
19719 params,
19720 statements,
19721 &locals,
19722 &def_keyword,
19723 &operator,
19724 &lparen,
19725 &rparen,
19726 &equal,
19727 &end_keyword
19728 );
19729 }
19730 case PM_TOKEN_KEYWORD_DEFINED: {
19731 parser_lex(parser);
19732 pm_token_t keyword = parser->previous;
19733
19734 pm_token_t lparen;
19735 pm_token_t rparen;
19736 pm_node_t *expression;
19737
19738 context_push(parser, PM_CONTEXT_DEFINED);
19739 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19740
19741 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19742 lparen = parser->previous;
19743
19744 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19745 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19746 lparen = not_provided(parser);
19747 rparen = not_provided(parser);
19748 } else {
19749 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19750
19751 if (parser->recovering) {
19752 rparen = not_provided(parser);
19753 } else {
19754 accept1(parser, PM_TOKEN_NEWLINE);
19755 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19756 rparen = parser->previous;
19757 }
19758 }
19759 } else {
19760 lparen = not_provided(parser);
19761 rparen = not_provided(parser);
19762 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19763 }
19764
19765 context_pop(parser);
19766 return (pm_node_t *) pm_defined_node_create(
19767 parser,
19768 &lparen,
19769 expression,
19770 &rparen,
19771 &PM_LOCATION_TOKEN_VALUE(&keyword)
19772 );
19773 }
19774 case PM_TOKEN_KEYWORD_END_UPCASE: {
19775 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19776 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19777 }
19778
19779 parser_lex(parser);
19780 pm_token_t keyword = parser->previous;
19781
19782 if (context_def_p(parser)) {
19783 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19784 }
19785
19786 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19787 pm_token_t opening = parser->previous;
19788 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19789
19790 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19791 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19792 }
19793 case PM_TOKEN_KEYWORD_FALSE:
19794 parser_lex(parser);
19795 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19796 case PM_TOKEN_KEYWORD_FOR: {
19797 size_t opening_newline_index = token_newline_index(parser);
19798 parser_lex(parser);
19799
19800 pm_token_t for_keyword = parser->previous;
19801 pm_node_t *index;
19802
19803 context_push(parser, PM_CONTEXT_FOR_INDEX);
19804
19805 // First, parse out the first index expression.
19806 if (accept1(parser, PM_TOKEN_USTAR)) {
19807 pm_token_t star_operator = parser->previous;
19808 pm_node_t *name = NULL;
19809
19810 if (token_begins_expression_p(parser->current.type)) {
19811 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19812 }
19813
19814 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19815 } else if (token_begins_expression_p(parser->current.type)) {
19816 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19817 } else {
19818 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19819 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19820 }
19821
19822 // Now, if there are multiple index expressions, parse them out.
19823 if (match1(parser, PM_TOKEN_COMMA)) {
19824 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19825 } else {
19826 index = parse_target(parser, index, false, false);
19827 }
19828
19829 context_pop(parser);
19830 pm_do_loop_stack_push(parser, true);
19831
19832 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19833 pm_token_t in_keyword = parser->previous;
19834
19835 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19836 pm_do_loop_stack_pop(parser);
19837
19838 pm_token_t do_keyword;
19839 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19840 do_keyword = parser->previous;
19841 } else {
19842 do_keyword = not_provided(parser);
19843 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19844 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19845 }
19846 }
19847
19848 pm_statements_node_t *statements = NULL;
19849 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19850 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19851 }
19852
19853 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19854 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19855
19856 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19857 }
19858 case PM_TOKEN_KEYWORD_IF:
19859 if (parser_end_of_line_p(parser)) {
19860 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19861 }
19862
19863 size_t opening_newline_index = token_newline_index(parser);
19864 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19865 parser_lex(parser);
19866
19867 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19868 case PM_TOKEN_KEYWORD_UNDEF: {
19869 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19870 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19871 }
19872
19873 parser_lex(parser);
19874 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19875 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19876
19877 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19878 pm_node_destroy(parser, name);
19879 } else {
19880 pm_undef_node_append(undef, name);
19881
19882 while (match1(parser, PM_TOKEN_COMMA)) {
19883 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19884 parser_lex(parser);
19885 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19886
19887 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19888 pm_node_destroy(parser, name);
19889 break;
19890 }
19891
19892 pm_undef_node_append(undef, name);
19893 }
19894 }
19895
19896 return (pm_node_t *) undef;
19897 }
19898 case PM_TOKEN_KEYWORD_NOT: {
19899 parser_lex(parser);
19900
19901 pm_token_t message = parser->previous;
19902 pm_arguments_t arguments = { 0 };
19903 pm_node_t *receiver = NULL;
19904
19905 // If we do not accept a command call, then we also do not accept a
19906 // not without parentheses. In this case we need to reject this
19907 // syntax.
19908 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19909 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19910 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19911 } else {
19912 accept1(parser, PM_TOKEN_NEWLINE);
19913 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19914 }
19915
19916 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19917 }
19918
19919 accept1(parser, PM_TOKEN_NEWLINE);
19920
19921 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19922 pm_token_t lparen = parser->previous;
19923
19924 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19925 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19926 } else {
19927 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19928 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19929
19930 if (!parser->recovering) {
19931 accept1(parser, PM_TOKEN_NEWLINE);
19932 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19933 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19934 }
19935 }
19936 } else {
19937 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19938 }
19939
19940 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19941 }
19942 case PM_TOKEN_KEYWORD_UNLESS: {
19943 size_t opening_newline_index = token_newline_index(parser);
19944 parser_lex(parser);
19945
19946 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19947 }
19948 case PM_TOKEN_KEYWORD_MODULE: {
19949 pm_node_list_t current_block_exits = { 0 };
19950 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19951
19952 size_t opening_newline_index = token_newline_index(parser);
19953 parser_lex(parser);
19954 pm_token_t module_keyword = parser->previous;
19955
19956 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19957 pm_token_t name;
19958
19959 // If we can recover from a syntax error that occurred while parsing
19960 // the name of the module, then we'll handle that here.
19961 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19962 pop_block_exits(parser, previous_block_exits);
19963 pm_node_list_free(&current_block_exits);
19964
19965 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19966 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19967 }
19968
19969 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19970 pm_token_t double_colon = parser->previous;
19971
19972 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19973 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19974 }
19975
19976 // Here we retrieve the name of the module. If it wasn't a constant,
19977 // then it's possible that `module foo` was passed, which is a
19978 // syntax error. We handle that here as well.
19979 name = parser->previous;
19980 if (name.type != PM_TOKEN_CONSTANT) {
19981 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19982 }
19983
19984 pm_parser_scope_push(parser, true);
19985 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19986 pm_node_t *statements = NULL;
19987
19988 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19989 pm_accepts_block_stack_push(parser, true);
19990 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19991 pm_accepts_block_stack_pop(parser);
19992 }
19993
19994 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19995 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19996 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19997 } else {
19998 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19999 }
20000
20001 pm_constant_id_list_t locals;
20002 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
20003
20004 pm_parser_scope_pop(parser);
20005 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
20006
20007 if (context_def_p(parser)) {
20008 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
20009 }
20010
20011 pop_block_exits(parser, previous_block_exits);
20012 pm_node_list_free(&current_block_exits);
20013
20014 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
20015 }
20016 case PM_TOKEN_KEYWORD_NIL:
20017 parser_lex(parser);
20018 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
20019 case PM_TOKEN_KEYWORD_REDO: {
20020 parser_lex(parser);
20021
20022 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
20023 if (!parser->partial_script) parse_block_exit(parser, node);
20024
20025 return node;
20026 }
20027 case PM_TOKEN_KEYWORD_RETRY: {
20028 parser_lex(parser);
20029
20030 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
20031 parse_retry(parser, node);
20032
20033 return node;
20034 }
20035 case PM_TOKEN_KEYWORD_SELF:
20036 parser_lex(parser);
20037 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
20038 case PM_TOKEN_KEYWORD_TRUE:
20039 parser_lex(parser);
20040 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
20041 case PM_TOKEN_KEYWORD_UNTIL: {
20042 size_t opening_newline_index = token_newline_index(parser);
20043
20044 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20045 pm_do_loop_stack_push(parser, true);
20046
20047 parser_lex(parser);
20048 pm_token_t keyword = parser->previous;
20049 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20050
20051 pm_do_loop_stack_pop(parser);
20052 context_pop(parser);
20053
20054 pm_token_t do_keyword;
20055 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20056 do_keyword = parser->previous;
20057 } else {
20058 do_keyword = not_provided(parser);
20059 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20060 }
20061
20062 pm_statements_node_t *statements = NULL;
20063 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20064 pm_accepts_block_stack_push(parser, true);
20065 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20066 pm_accepts_block_stack_pop(parser);
20067 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20068 }
20069
20070 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20071 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
20072
20073 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20074 }
20075 case PM_TOKEN_KEYWORD_WHILE: {
20076 size_t opening_newline_index = token_newline_index(parser);
20077
20078 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20079 pm_do_loop_stack_push(parser, true);
20080
20081 parser_lex(parser);
20082 pm_token_t keyword = parser->previous;
20083 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20084
20085 pm_do_loop_stack_pop(parser);
20086 context_pop(parser);
20087
20088 pm_token_t do_keyword;
20089 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20090 do_keyword = parser->previous;
20091 } else {
20092 do_keyword = not_provided(parser);
20093 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20094 }
20095
20096 pm_statements_node_t *statements = NULL;
20097 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20098 pm_accepts_block_stack_push(parser, true);
20099 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20100 pm_accepts_block_stack_pop(parser);
20101 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20102 }
20103
20104 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20105 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
20106
20107 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
20108 }
20109 case PM_TOKEN_PERCENT_LOWER_I: {
20110 parser_lex(parser);
20111 pm_token_t opening = parser->previous;
20112 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20113
20114 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20115 accept1(parser, PM_TOKEN_WORDS_SEP);
20116 if (match1(parser, PM_TOKEN_STRING_END)) break;
20117
20118 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20119 pm_token_t opening = not_provided(parser);
20120 pm_token_t closing = not_provided(parser);
20121 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
20122 }
20123
20124 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20125 }
20126
20127 pm_token_t closing = parser->current;
20128 if (match1(parser, PM_TOKEN_EOF)) {
20129 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20130 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20131 } else {
20132 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20133 }
20134 pm_array_node_close_set(array, &closing);
20135
20136 return (pm_node_t *) array;
20137 }
20138 case PM_TOKEN_PERCENT_UPPER_I: {
20139 parser_lex(parser);
20140 pm_token_t opening = parser->previous;
20141 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20142
20143 // This is the current node that we are parsing that will be added to the
20144 // list of elements.
20145 pm_node_t *current = NULL;
20146
20147 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20148 switch (parser->current.type) {
20149 case PM_TOKEN_WORDS_SEP: {
20150 if (current == NULL) {
20151 // If we hit a separator before we have any content, then we don't
20152 // need to do anything.
20153 } else {
20154 // If we hit a separator after we've hit content, then we need to
20155 // append that content to the list and reset the current node.
20156 pm_array_node_elements_append(array, current);
20157 current = NULL;
20158 }
20159
20160 parser_lex(parser);
20161 break;
20162 }
20163 case PM_TOKEN_STRING_CONTENT: {
20164 pm_token_t opening = not_provided(parser);
20165 pm_token_t closing = not_provided(parser);
20166
20167 if (current == NULL) {
20168 // If we hit content and the current node is NULL, then this is
20169 // the first string content we've seen. In that case we're going
20170 // to create a new string node and set that to the current.
20171 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20172 parser_lex(parser);
20173 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20174 // If we hit string content and the current node is an
20175 // interpolated string, then we need to append the string content
20176 // to the list of child nodes.
20177 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20178 parser_lex(parser);
20179
20180 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20181 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20182 // If we hit string content and the current node is a symbol node,
20183 // then we need to convert the current node into an interpolated
20184 // string and add the string content to the list of child nodes.
20185 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20186 pm_token_t bounds = not_provided(parser);
20187
20188 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20189 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20190 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20191 parser_lex(parser);
20192
20193 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20194 pm_interpolated_symbol_node_append(interpolated, first_string);
20195 pm_interpolated_symbol_node_append(interpolated, second_string);
20196
20197 xfree(current);
20198 current = (pm_node_t *) interpolated;
20199 } else {
20200 assert(false && "unreachable");
20201 }
20202
20203 break;
20204 }
20205 case PM_TOKEN_EMBVAR: {
20206 bool start_location_set = false;
20207 if (current == NULL) {
20208 // If we hit an embedded variable and the current node is NULL,
20209 // then this is the start of a new string. We'll set the current
20210 // node to a new interpolated string.
20211 pm_token_t opening = not_provided(parser);
20212 pm_token_t closing = not_provided(parser);
20213 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20214 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20215 // If we hit an embedded variable and the current node is a string
20216 // node, then we'll convert the current into an interpolated
20217 // string and add the string node to the list of parts.
20218 pm_token_t opening = not_provided(parser);
20219 pm_token_t closing = not_provided(parser);
20220 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20221
20222 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20223 pm_interpolated_symbol_node_append(interpolated, current);
20224 interpolated->base.location.start = current->location.start;
20225 start_location_set = true;
20226 current = (pm_node_t *) interpolated;
20227 } else {
20228 // If we hit an embedded variable and the current node is an
20229 // interpolated string, then we'll just add the embedded variable.
20230 }
20231
20232 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20233 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20234 if (!start_location_set) {
20235 current->location.start = part->location.start;
20236 }
20237 break;
20238 }
20239 case PM_TOKEN_EMBEXPR_BEGIN: {
20240 bool start_location_set = false;
20241 if (current == NULL) {
20242 // If we hit an embedded expression and the current node is NULL,
20243 // then this is the start of a new string. We'll set the current
20244 // node to a new interpolated string.
20245 pm_token_t opening = not_provided(parser);
20246 pm_token_t closing = not_provided(parser);
20247 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20248 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20249 // If we hit an embedded expression and the current node is a
20250 // string node, then we'll convert the current into an
20251 // interpolated string and add the string node to the list of
20252 // parts.
20253 pm_token_t opening = not_provided(parser);
20254 pm_token_t closing = not_provided(parser);
20255 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20256
20257 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20258 pm_interpolated_symbol_node_append(interpolated, current);
20259 interpolated->base.location.start = current->location.start;
20260 start_location_set = true;
20261 current = (pm_node_t *) interpolated;
20262 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20263 // If we hit an embedded expression and the current node is an
20264 // interpolated string, then we'll just continue on.
20265 } else {
20266 assert(false && "unreachable");
20267 }
20268
20269 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20270 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20271 if (!start_location_set) {
20272 current->location.start = part->location.start;
20273 }
20274 break;
20275 }
20276 default:
20277 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20278 parser_lex(parser);
20279 break;
20280 }
20281 }
20282
20283 // If we have a current node, then we need to append it to the list.
20284 if (current) {
20285 pm_array_node_elements_append(array, current);
20286 }
20287
20288 pm_token_t closing = parser->current;
20289 if (match1(parser, PM_TOKEN_EOF)) {
20290 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20291 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20292 } else {
20293 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20294 }
20295 pm_array_node_close_set(array, &closing);
20296
20297 return (pm_node_t *) array;
20298 }
20299 case PM_TOKEN_PERCENT_LOWER_W: {
20300 parser_lex(parser);
20301 pm_token_t opening = parser->previous;
20302 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20303
20304 // skip all leading whitespaces
20305 accept1(parser, PM_TOKEN_WORDS_SEP);
20306
20307 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20308 accept1(parser, PM_TOKEN_WORDS_SEP);
20309 if (match1(parser, PM_TOKEN_STRING_END)) break;
20310
20311 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20312 pm_token_t opening = not_provided(parser);
20313 pm_token_t closing = not_provided(parser);
20314
20315 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20316 pm_array_node_elements_append(array, string);
20317 }
20318
20319 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20320 }
20321
20322 pm_token_t closing = parser->current;
20323 if (match1(parser, PM_TOKEN_EOF)) {
20324 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20325 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20326 } else {
20327 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20328 }
20329
20330 pm_array_node_close_set(array, &closing);
20331 return (pm_node_t *) array;
20332 }
20333 case PM_TOKEN_PERCENT_UPPER_W: {
20334 parser_lex(parser);
20335 pm_token_t opening = parser->previous;
20336 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20337
20338 // This is the current node that we are parsing that will be added
20339 // to the list of elements.
20340 pm_node_t *current = NULL;
20341
20342 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20343 switch (parser->current.type) {
20344 case PM_TOKEN_WORDS_SEP: {
20345 // Reset the explicit encoding if we hit a separator
20346 // since each element can have its own encoding.
20347 parser->explicit_encoding = NULL;
20348
20349 if (current == NULL) {
20350 // If we hit a separator before we have any content,
20351 // then we don't need to do anything.
20352 } else {
20353 // If we hit a separator after we've hit content,
20354 // then we need to append that content to the list
20355 // and reset the current node.
20356 pm_array_node_elements_append(array, current);
20357 current = NULL;
20358 }
20359
20360 parser_lex(parser);
20361 break;
20362 }
20363 case PM_TOKEN_STRING_CONTENT: {
20364 pm_token_t opening = not_provided(parser);
20365 pm_token_t closing = not_provided(parser);
20366
20367 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20368 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20369 parser_lex(parser);
20370
20371 if (current == NULL) {
20372 // If we hit content and the current node is NULL,
20373 // then this is the first string content we've seen.
20374 // In that case we're going to create a new string
20375 // node and set that to the current.
20376 current = string;
20377 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20378 // If we hit string content and the current node is
20379 // an interpolated string, then we need to append
20380 // the string content to the list of child nodes.
20381 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20382 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20383 // If we hit string content and the current node is
20384 // a string node, then we need to convert the
20385 // current node into an interpolated string and add
20386 // the string content to the list of child nodes.
20387 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20388 pm_interpolated_string_node_append(interpolated, current);
20389 pm_interpolated_string_node_append(interpolated, string);
20390 current = (pm_node_t *) interpolated;
20391 } else {
20392 assert(false && "unreachable");
20393 }
20394
20395 break;
20396 }
20397 case PM_TOKEN_EMBVAR: {
20398 if (current == NULL) {
20399 // If we hit an embedded variable and the current
20400 // node is NULL, then this is the start of a new
20401 // string. We'll set the current node to a new
20402 // interpolated string.
20403 pm_token_t opening = not_provided(parser);
20404 pm_token_t closing = not_provided(parser);
20405 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20406 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20407 // If we hit an embedded variable and the current
20408 // node is a string node, then we'll convert the
20409 // current into an interpolated string and add the
20410 // string node to the list of parts.
20411 pm_token_t opening = not_provided(parser);
20412 pm_token_t closing = not_provided(parser);
20413 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20414 pm_interpolated_string_node_append(interpolated, current);
20415 current = (pm_node_t *) interpolated;
20416 } else {
20417 // If we hit an embedded variable and the current
20418 // node is an interpolated string, then we'll just
20419 // add the embedded variable.
20420 }
20421
20422 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20423 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20424 break;
20425 }
20426 case PM_TOKEN_EMBEXPR_BEGIN: {
20427 if (current == NULL) {
20428 // If we hit an embedded expression and the current
20429 // node is NULL, then this is the start of a new
20430 // string. We'll set the current node to a new
20431 // interpolated string.
20432 pm_token_t opening = not_provided(parser);
20433 pm_token_t closing = not_provided(parser);
20434 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20435 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20436 // If we hit an embedded expression and the current
20437 // node is a string node, then we'll convert the
20438 // current into an interpolated string and add the
20439 // string node to the list of parts.
20440 pm_token_t opening = not_provided(parser);
20441 pm_token_t closing = not_provided(parser);
20442 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20443 pm_interpolated_string_node_append(interpolated, current);
20444 current = (pm_node_t *) interpolated;
20445 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20446 // If we hit an embedded expression and the current
20447 // node is an interpolated string, then we'll just
20448 // continue on.
20449 } else {
20450 assert(false && "unreachable");
20451 }
20452
20453 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20454 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20455 break;
20456 }
20457 default:
20458 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20459 parser_lex(parser);
20460 break;
20461 }
20462 }
20463
20464 // If we have a current node, then we need to append it to the list.
20465 if (current) {
20466 pm_array_node_elements_append(array, current);
20467 }
20468
20469 pm_token_t closing = parser->current;
20470 if (match1(parser, PM_TOKEN_EOF)) {
20471 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20472 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20473 } else {
20474 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20475 }
20476
20477 pm_array_node_close_set(array, &closing);
20478 return (pm_node_t *) array;
20479 }
20480 case PM_TOKEN_REGEXP_BEGIN: {
20481 pm_token_t opening = parser->current;
20482 parser_lex(parser);
20483
20484 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20485 // If we get here, then we have an end immediately after a start. In
20486 // that case we'll create an empty content token and return an
20487 // uninterpolated regular expression.
20488 pm_token_t content = (pm_token_t) {
20489 .type = PM_TOKEN_STRING_CONTENT,
20490 .start = parser->previous.end,
20491 .end = parser->previous.end
20492 };
20493
20494 parser_lex(parser);
20495
20496 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20497 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
20498
20499 return node;
20500 }
20501
20503
20504 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20505 // In this case we've hit string content so we know the regular
20506 // expression at least has something in it. We'll need to check if the
20507 // following token is the end (in which case we can return a plain
20508 // regular expression) or if it's not then it has interpolation.
20509 pm_string_t unescaped = parser->current_string;
20510 pm_token_t content = parser->current;
20511 bool ascii_only = parser->current_regular_expression_ascii_only;
20512 parser_lex(parser);
20513
20514 // If we hit an end, then we can create a regular expression
20515 // node without interpolation, which can be represented more
20516 // succinctly and more easily compiled.
20517 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20518 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20519
20520 // If we're not immediately followed by a =~, then we want
20521 // to parse all of the errors at this point. If it is
20522 // followed by a =~, then it will get parsed higher up while
20523 // parsing the named captures as well.
20524 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20525 parse_regular_expression_errors(parser, node);
20526 }
20527
20528 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20529 return (pm_node_t *) node;
20530 }
20531
20532 // If we get here, then we have interpolation so we'll need to create
20533 // a regular expression node with interpolation.
20534 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20535
20536 pm_token_t opening = not_provided(parser);
20537 pm_token_t closing = not_provided(parser);
20538 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20539
20540 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20541 // This is extremely strange, but the first string part of a
20542 // regular expression will always be tagged as binary if we
20543 // are in a US-ASCII file, no matter its contents.
20544 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20545 }
20546
20547 pm_interpolated_regular_expression_node_append(interpolated, part);
20548 } else {
20549 // If the first part of the body of the regular expression is not a
20550 // string content, then we have interpolation and we need to create an
20551 // interpolated regular expression node.
20552 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20553 }
20554
20555 // Now that we're here and we have interpolation, we'll parse all of the
20556 // parts into the list.
20557 pm_node_t *part;
20558 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20559 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20560 pm_interpolated_regular_expression_node_append(interpolated, part);
20561 }
20562 }
20563
20564 pm_token_t closing = parser->current;
20565 if (match1(parser, PM_TOKEN_EOF)) {
20566 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20567 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20568 } else {
20569 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20570 }
20571
20572 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20573 return (pm_node_t *) interpolated;
20574 }
20575 case PM_TOKEN_BACKTICK:
20576 case PM_TOKEN_PERCENT_LOWER_X: {
20577 parser_lex(parser);
20578 pm_token_t opening = parser->previous;
20579
20580 // When we get here, we don't know if this string is going to have
20581 // interpolation or not, even though it is allowed. Still, we want to be
20582 // able to return a string node without interpolation if we can since
20583 // it'll be faster.
20584 if (match1(parser, PM_TOKEN_STRING_END)) {
20585 // If we get here, then we have an end immediately after a start. In
20586 // that case we'll create an empty content token and return an
20587 // uninterpolated string.
20588 pm_token_t content = (pm_token_t) {
20589 .type = PM_TOKEN_STRING_CONTENT,
20590 .start = parser->previous.end,
20591 .end = parser->previous.end
20592 };
20593
20594 parser_lex(parser);
20595 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20596 }
20597
20599
20600 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20601 // In this case we've hit string content so we know the string
20602 // at least has something in it. We'll need to check if the
20603 // following token is the end (in which case we can return a
20604 // plain string) or if it's not then it has interpolation.
20605 pm_string_t unescaped = parser->current_string;
20606 pm_token_t content = parser->current;
20607 parser_lex(parser);
20608
20609 if (match1(parser, PM_TOKEN_STRING_END)) {
20610 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20611 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20612 parser_lex(parser);
20613 return node;
20614 }
20615
20616 // If we get here, then we have interpolation so we'll need to
20617 // create a string node with interpolation.
20618 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20619
20620 pm_token_t opening = not_provided(parser);
20621 pm_token_t closing = not_provided(parser);
20622
20623 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20624 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20625
20626 pm_interpolated_xstring_node_append(node, part);
20627 } else {
20628 // If the first part of the body of the string is not a string
20629 // content, then we have interpolation and we need to create an
20630 // interpolated string node.
20631 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20632 }
20633
20634 pm_node_t *part;
20635 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20636 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20637 pm_interpolated_xstring_node_append(node, part);
20638 }
20639 }
20640
20641 pm_token_t closing = parser->current;
20642 if (match1(parser, PM_TOKEN_EOF)) {
20643 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20644 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20645 } else {
20646 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20647 }
20648 pm_interpolated_xstring_node_closing_set(node, &closing);
20649
20650 return (pm_node_t *) node;
20651 }
20652 case PM_TOKEN_USTAR: {
20653 parser_lex(parser);
20654
20655 // * operators at the beginning of expressions are only valid in the
20656 // context of a multiple assignment. We enforce that here. We'll
20657 // still lex past it though and create a missing node place.
20658 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20659 pm_parser_err_prefix(parser, diag_id);
20660 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20661 }
20662
20663 pm_token_t operator = parser->previous;
20664 pm_node_t *name = NULL;
20665
20666 if (token_begins_expression_p(parser->current.type)) {
20667 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20668 }
20669
20670 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20671
20672 if (match1(parser, PM_TOKEN_COMMA)) {
20673 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20674 } else {
20675 return parse_target_validate(parser, splat, true);
20676 }
20677 }
20678 case PM_TOKEN_BANG: {
20679 if (binding_power > PM_BINDING_POWER_UNARY) {
20680 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20681 }
20682
20683 parser_lex(parser);
20684
20685 pm_token_t operator = parser->previous;
20686 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20687 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20688
20689 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20690 return (pm_node_t *) node;
20691 }
20692 case PM_TOKEN_TILDE: {
20693 if (binding_power > PM_BINDING_POWER_UNARY) {
20694 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20695 }
20696 parser_lex(parser);
20697
20698 pm_token_t operator = parser->previous;
20699 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20700 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20701
20702 return (pm_node_t *) node;
20703 }
20704 case PM_TOKEN_UMINUS: {
20705 if (binding_power > PM_BINDING_POWER_UNARY) {
20706 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20707 }
20708 parser_lex(parser);
20709
20710 pm_token_t operator = parser->previous;
20711 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20712 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20713
20714 return (pm_node_t *) node;
20715 }
20716 case PM_TOKEN_UMINUS_NUM: {
20717 parser_lex(parser);
20718
20719 pm_token_t operator = parser->previous;
20720 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20721
20722 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20723 pm_token_t exponent_operator = parser->previous;
20724 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20725 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20726 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20727 } else {
20728 switch (PM_NODE_TYPE(node)) {
20729 case PM_INTEGER_NODE:
20730 case PM_FLOAT_NODE:
20731 case PM_RATIONAL_NODE:
20732 case PM_IMAGINARY_NODE:
20733 parse_negative_numeric(node);
20734 break;
20735 default:
20736 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20737 break;
20738 }
20739 }
20740
20741 return node;
20742 }
20743 case PM_TOKEN_MINUS_GREATER: {
20744 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20746
20747 size_t opening_newline_index = token_newline_index(parser);
20748 pm_accepts_block_stack_push(parser, true);
20749 parser_lex(parser);
20750
20751 pm_token_t operator = parser->previous;
20752 pm_parser_scope_push(parser, false);
20753
20754 pm_block_parameters_node_t *block_parameters;
20755
20756 switch (parser->current.type) {
20757 case PM_TOKEN_PARENTHESIS_LEFT: {
20758 pm_token_t opening = parser->current;
20759 parser_lex(parser);
20760
20761 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20762 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20763 } else {
20764 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20765 }
20766
20767 accept1(parser, PM_TOKEN_NEWLINE);
20768 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20769
20770 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20771 break;
20772 }
20773 case PM_CASE_PARAMETER: {
20774 pm_accepts_block_stack_push(parser, false);
20775 pm_token_t opening = not_provided(parser);
20776 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20777 pm_accepts_block_stack_pop(parser);
20778 break;
20779 }
20780 default: {
20781 block_parameters = NULL;
20782 break;
20783 }
20784 }
20785
20786 pm_token_t opening;
20787 pm_node_t *body = NULL;
20788 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20789
20790 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20791 opening = parser->previous;
20792
20793 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20794 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20795 }
20796
20797 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20798 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20799 } else {
20800 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20801 opening = parser->previous;
20802
20803 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20804 pm_accepts_block_stack_push(parser, true);
20805 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20806 pm_accepts_block_stack_pop(parser);
20807 }
20808
20809 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20810 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20811 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20812 } else {
20813 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20814 }
20815
20816 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20817 }
20818
20819 pm_constant_id_list_t locals;
20820 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20821 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20822
20823 pm_parser_scope_pop(parser);
20824 pm_accepts_block_stack_pop(parser);
20825
20826 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20827 }
20828 case PM_TOKEN_UPLUS: {
20829 if (binding_power > PM_BINDING_POWER_UNARY) {
20830 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20831 }
20832 parser_lex(parser);
20833
20834 pm_token_t operator = parser->previous;
20835 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20836 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20837
20838 return (pm_node_t *) node;
20839 }
20840 case PM_TOKEN_STRING_BEGIN:
20841 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20842 case PM_TOKEN_SYMBOL_BEGIN: {
20843 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20844 parser_lex(parser);
20845
20846 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20847 }
20848 default: {
20849 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20850
20851 if (recoverable != PM_CONTEXT_NONE) {
20852 parser->recovering = true;
20853
20854 // If the given error is not the generic one, then we'll add it
20855 // here because it will provide more context in addition to the
20856 // recoverable error that we will also add.
20857 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20858 pm_parser_err_prefix(parser, diag_id);
20859 }
20860
20861 // If we get here, then we are assuming this token is closing a
20862 // parent context, so we'll indicate that to the user so that
20863 // they know how we behaved.
20864 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20865 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20866 // We're going to make a special case here, because "cannot
20867 // parse expression" is pretty generic, and we know here that we
20868 // have an unexpected token.
20869 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20870 } else {
20871 pm_parser_err_prefix(parser, diag_id);
20872 }
20873
20874 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20875 }
20876 }
20877}
20878
20888static pm_node_t *
20889parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20890 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20891
20892 // Contradicting binding powers, the right-hand-side value of the assignment
20893 // allows the `rescue` modifier.
20894 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20895 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20896
20897 pm_token_t rescue = parser->current;
20898 parser_lex(parser);
20899
20900 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20901 context_pop(parser);
20902
20903 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20904 }
20905
20906 return value;
20907}
20908
20913static void
20914parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20915 switch (PM_NODE_TYPE(node)) {
20916 case PM_BEGIN_NODE: {
20917 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20918 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20919 break;
20920 }
20921 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20923 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20924 break;
20925 }
20926 case PM_PARENTHESES_NODE: {
20927 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20928 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20929 break;
20930 }
20931 case PM_STATEMENTS_NODE: {
20932 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20933 const pm_node_t *statement;
20934
20935 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20936 parse_assignment_value_local(parser, statement);
20937 }
20938 break;
20939 }
20940 default:
20941 break;
20942 }
20943}
20944
20957static pm_node_t *
20958parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20959 bool permitted = true;
20960 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20961
20962 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20963 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20964
20965 parse_assignment_value_local(parser, value);
20966 bool single_value = true;
20967
20968 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20969 single_value = false;
20970
20971 pm_token_t opening = not_provided(parser);
20972 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20973
20974 pm_array_node_elements_append(array, value);
20975 value = (pm_node_t *) array;
20976
20977 while (accept1(parser, PM_TOKEN_COMMA)) {
20978 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20979
20980 pm_array_node_elements_append(array, element);
20981 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20982
20983 parse_assignment_value_local(parser, element);
20984 }
20985 }
20986
20987 // Contradicting binding powers, the right-hand-side value of the assignment
20988 // allows the `rescue` modifier.
20989 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20990 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20991
20992 pm_token_t rescue = parser->current;
20993 parser_lex(parser);
20994
20995 bool accepts_command_call_inner = false;
20996
20997 // RHS can accept command call iff the value is a call with arguments
20998 // but without parenthesis.
20999 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
21000 pm_call_node_t *call_node = (pm_call_node_t *) value;
21001 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
21002 accepts_command_call_inner = true;
21003 }
21004 }
21005
21006 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21007 context_pop(parser);
21008
21009 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
21010 }
21011
21012 return value;
21013}
21014
21022static void
21023parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
21024 if (call_node->arguments != NULL) {
21025 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
21026 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
21027 call_node->arguments = NULL;
21028 }
21029
21030 if (call_node->block != NULL) {
21031 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
21032 pm_node_destroy(parser, (pm_node_t *) call_node->block);
21033 call_node->block = NULL;
21034 }
21035}
21036
21061
21062static inline const uint8_t *
21063pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21064 cursor++;
21065
21066 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21067 uint8_t value = escape_hexadecimal_digit(*cursor);
21068 cursor++;
21069
21070 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
21071 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
21072 cursor++;
21073 }
21074
21075 pm_buffer_append_byte(unescaped, value);
21076 } else {
21077 pm_buffer_append_string(unescaped, "\\x", 2);
21078 }
21079
21080 return cursor;
21081}
21082
21083static inline const uint8_t *
21084pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21085 uint8_t value = (uint8_t) (*cursor - '0');
21086 cursor++;
21087
21088 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21089 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21090 cursor++;
21091
21092 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
21093 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
21094 cursor++;
21095 }
21096 }
21097
21098 pm_buffer_append_byte(unescaped, value);
21099 return cursor;
21100}
21101
21102static inline const uint8_t *
21103pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
21104 const uint8_t *start = cursor - 1;
21105 cursor++;
21106
21107 if (cursor >= end) {
21108 pm_buffer_append_string(unescaped, "\\u", 2);
21109 return cursor;
21110 }
21111
21112 if (*cursor != '{') {
21113 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
21114 uint32_t value = escape_unicode(parser, cursor, length);
21115
21116 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
21117 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
21118 }
21119
21120 return cursor + length;
21121 }
21122
21123 cursor++;
21124 for (;;) {
21125 while (cursor < end && *cursor == ' ') cursor++;
21126
21127 if (cursor >= end) break;
21128 if (*cursor == '}') {
21129 cursor++;
21130 break;
21131 }
21132
21133 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
21134 uint32_t value = escape_unicode(parser, cursor, length);
21135
21136 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
21137 cursor += length;
21138 }
21139
21140 return cursor;
21141}
21142
21143static void
21144pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21145 const uint8_t *end = source + length;
21146 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21147
21148 for (;;) {
21149 if (++cursor >= end) {
21150 pm_buffer_append_byte(unescaped, '\\');
21151 return;
21152 }
21153
21154 switch (*cursor) {
21155 case 'x':
21156 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21157 break;
21158 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21159 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21160 break;
21161 case 'u':
21162 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21163 break;
21164 default:
21165 pm_buffer_append_byte(unescaped, '\\');
21166 break;
21167 }
21168
21169 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21170 if (next_cursor == NULL) break;
21171
21172 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21173 cursor = next_cursor;
21174 }
21175
21176 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21177}
21178
21183static void
21184parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21186
21187 pm_parser_t *parser = callback_data->parser;
21188 pm_call_node_t *call = callback_data->call;
21189 pm_constant_id_list_t *names = &callback_data->names;
21190
21191 const uint8_t *source = pm_string_source(capture);
21192 size_t length = pm_string_length(capture);
21193 pm_buffer_t unescaped = { 0 };
21194
21195 // First, we need to handle escapes within the name of the capture group.
21196 // This is because regular expressions have three different representations
21197 // in prism. The first is the plain source code. The second is the
21198 // representation that will be sent to the regular expression engine, which
21199 // is the value of the "unescaped" field. This is poorly named, because it
21200 // actually still contains escapes, just a subset of them that the regular
21201 // expression engine knows how to handle. The third representation is fully
21202 // unescaped, which is what we need.
21203 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21204 if (PRISM_UNLIKELY(cursor != NULL)) {
21205 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21206 source = (const uint8_t *) pm_buffer_value(&unescaped);
21207 length = pm_buffer_length(&unescaped);
21208 }
21209
21210 pm_location_t location;
21211 pm_constant_id_t name;
21212
21213 // If the name of the capture group isn't a valid identifier, we do
21214 // not add it to the local table.
21215 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21216 pm_buffer_free(&unescaped);
21217 return;
21218 }
21219
21220 if (callback_data->shared) {
21221 // If the unescaped string is a slice of the source, then we can
21222 // copy the names directly. The pointers will line up.
21223 location = (pm_location_t) { .start = source, .end = source + length };
21224 name = pm_parser_constant_id_location(parser, location.start, location.end);
21225 } else {
21226 // Otherwise, the name is a slice of the malloc-ed owned string,
21227 // in which case we need to copy it out into a new string.
21228 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21229
21230 void *memory = xmalloc(length);
21231 if (memory == NULL) abort();
21232
21233 memcpy(memory, source, length);
21234 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21235 }
21236
21237 // Add this name to the list of constants if it is valid, not duplicated,
21238 // and not a keyword.
21239 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21240 pm_constant_id_list_append(names, name);
21241
21242 int depth;
21243 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21244 // If the local is not already a local but it is a keyword, then we
21245 // do not want to add a capture for this.
21246 if (pm_local_is_keyword((const char *) source, length)) {
21247 pm_buffer_free(&unescaped);
21248 return;
21249 }
21250
21251 // If the identifier is not already a local, then we will add it to
21252 // the local table.
21253 pm_parser_local_add(parser, name, location.start, location.end, 0);
21254 }
21255
21256 // Here we lazily create the MatchWriteNode since we know we're
21257 // about to add a target.
21258 if (callback_data->match == NULL) {
21259 callback_data->match = pm_match_write_node_create(parser, call);
21260 }
21261
21262 // Next, create the local variable target and add it to the list of
21263 // targets for the match.
21264 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21265 pm_node_list_append(&callback_data->match->targets, target);
21266 }
21267
21268 pm_buffer_free(&unescaped);
21269}
21270
21275static pm_node_t *
21276parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21278 .parser = parser,
21279 .call = call,
21280 .names = { 0 },
21281 .shared = content->type == PM_STRING_SHARED
21282 };
21283
21285 .parser = parser,
21286 .start = call->receiver->location.start,
21287 .end = call->receiver->location.end,
21288 .shared = content->type == PM_STRING_SHARED
21289 };
21290
21291 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21292 pm_constant_id_list_free(&callback_data.names);
21293
21294 if (callback_data.match != NULL) {
21295 return (pm_node_t *) callback_data.match;
21296 } else {
21297 return (pm_node_t *) call;
21298 }
21299}
21300
21301static inline pm_node_t *
21302parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21303 pm_token_t token = parser->current;
21304
21305 switch (token.type) {
21306 case PM_TOKEN_EQUAL: {
21307 switch (PM_NODE_TYPE(node)) {
21308 case PM_CALL_NODE: {
21309 // If we have no arguments to the call node and we need this
21310 // to be a target then this is either a method call or a
21311 // local variable write. This _must_ happen before the value
21312 // is parsed because it could be referenced in the value.
21313 pm_call_node_t *call_node = (pm_call_node_t *) node;
21314 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21315 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21316 }
21317 }
21319 case PM_CASE_WRITABLE: {
21320 // When we have `it = value`, we need to add `it` as a local
21321 // variable before parsing the value, in case the value
21322 // references the variable.
21323 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21324 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
21325 }
21326
21327 parser_lex(parser);
21328 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21329
21330 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21331 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21332 }
21333
21334 return parse_write(parser, node, &token, value);
21335 }
21336 case PM_SPLAT_NODE: {
21337 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21338 pm_multi_target_node_targets_append(parser, multi_target, node);
21339
21340 parser_lex(parser);
21341 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21342 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21343 }
21344 case PM_SOURCE_ENCODING_NODE:
21345 case PM_FALSE_NODE:
21346 case PM_SOURCE_FILE_NODE:
21347 case PM_SOURCE_LINE_NODE:
21348 case PM_NIL_NODE:
21349 case PM_SELF_NODE:
21350 case PM_TRUE_NODE: {
21351 // In these special cases, we have specific error messages
21352 // and we will replace them with local variable writes.
21353 parser_lex(parser);
21354 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21355 return parse_unwriteable_write(parser, node, &token, value);
21356 }
21357 default:
21358 // In this case we have an = sign, but we don't know what
21359 // it's for. We need to treat it as an error. We'll mark it
21360 // as an error and skip past it.
21361 parser_lex(parser);
21362 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21363 return node;
21364 }
21365 }
21366 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21367 switch (PM_NODE_TYPE(node)) {
21368 case PM_BACK_REFERENCE_READ_NODE:
21369 case PM_NUMBERED_REFERENCE_READ_NODE:
21370 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21372 case PM_GLOBAL_VARIABLE_READ_NODE: {
21373 parser_lex(parser);
21374
21375 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21376 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21377
21378 pm_node_destroy(parser, node);
21379 return result;
21380 }
21381 case PM_CLASS_VARIABLE_READ_NODE: {
21382 parser_lex(parser);
21383
21384 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21385 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21386
21387 pm_node_destroy(parser, node);
21388 return result;
21389 }
21390 case PM_CONSTANT_PATH_NODE: {
21391 parser_lex(parser);
21392
21393 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21394 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21395
21396 return parse_shareable_constant_write(parser, write);
21397 }
21398 case PM_CONSTANT_READ_NODE: {
21399 parser_lex(parser);
21400
21401 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21402 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21403
21404 pm_node_destroy(parser, node);
21405 return parse_shareable_constant_write(parser, write);
21406 }
21407 case PM_INSTANCE_VARIABLE_READ_NODE: {
21408 parser_lex(parser);
21409
21410 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21411 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21412
21413 pm_node_destroy(parser, node);
21414 return result;
21415 }
21416 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21417 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21418 parser_lex(parser);
21419
21420 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21421 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21422
21423 parse_target_implicit_parameter(parser, node);
21424 pm_node_destroy(parser, node);
21425 return result;
21426 }
21427 case PM_LOCAL_VARIABLE_READ_NODE: {
21428 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21429 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21430 parse_target_implicit_parameter(parser, node);
21431 }
21432
21434 parser_lex(parser);
21435
21436 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21437 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21438
21439 pm_node_destroy(parser, node);
21440 return result;
21441 }
21442 case PM_CALL_NODE: {
21443 pm_call_node_t *cast = (pm_call_node_t *) node;
21444
21445 // If we have a vcall (a method with no arguments and no
21446 // receiver that could have been a local variable) then we
21447 // will transform it into a local variable write.
21448 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21449 pm_location_t *message_loc = &cast->message_loc;
21450 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21451
21452 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21453 parser_lex(parser);
21454
21455 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21456 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21457
21458 pm_node_destroy(parser, (pm_node_t *) cast);
21459 return result;
21460 }
21461
21462 // Move past the token here so that we have already added
21463 // the local variable by this point.
21464 parser_lex(parser);
21465
21466 // If there is no call operator and the message is "[]" then
21467 // this is an aref expression, and we can transform it into
21468 // an aset expression.
21469 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21470 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21471 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21472 }
21473
21474 // If this node cannot be writable, then we have an error.
21475 if (pm_call_node_writable_p(parser, cast)) {
21476 parse_write_name(parser, &cast->name);
21477 } else {
21478 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21479 }
21480
21481 parse_call_operator_write(parser, cast, &token);
21482 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21483 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21484 }
21485 case PM_MULTI_WRITE_NODE: {
21486 parser_lex(parser);
21487 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21488 return node;
21489 }
21490 default:
21491 parser_lex(parser);
21492
21493 // In this case we have an &&= sign, but we don't know what it's for.
21494 // We need to treat it as an error. For now, we'll mark it as an error
21495 // and just skip right past it.
21496 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21497 return node;
21498 }
21499 }
21500 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21501 switch (PM_NODE_TYPE(node)) {
21502 case PM_BACK_REFERENCE_READ_NODE:
21503 case PM_NUMBERED_REFERENCE_READ_NODE:
21504 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21506 case PM_GLOBAL_VARIABLE_READ_NODE: {
21507 parser_lex(parser);
21508
21509 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21510 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21511
21512 pm_node_destroy(parser, node);
21513 return result;
21514 }
21515 case PM_CLASS_VARIABLE_READ_NODE: {
21516 parser_lex(parser);
21517
21518 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21519 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21520
21521 pm_node_destroy(parser, node);
21522 return result;
21523 }
21524 case PM_CONSTANT_PATH_NODE: {
21525 parser_lex(parser);
21526
21527 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21528 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21529
21530 return parse_shareable_constant_write(parser, write);
21531 }
21532 case PM_CONSTANT_READ_NODE: {
21533 parser_lex(parser);
21534
21535 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21536 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21537
21538 pm_node_destroy(parser, node);
21539 return parse_shareable_constant_write(parser, write);
21540 }
21541 case PM_INSTANCE_VARIABLE_READ_NODE: {
21542 parser_lex(parser);
21543
21544 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21545 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21546
21547 pm_node_destroy(parser, node);
21548 return result;
21549 }
21550 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21551 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21552 parser_lex(parser);
21553
21554 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21555 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21556
21557 parse_target_implicit_parameter(parser, node);
21558 pm_node_destroy(parser, node);
21559 return result;
21560 }
21561 case PM_LOCAL_VARIABLE_READ_NODE: {
21562 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21563 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21564 parse_target_implicit_parameter(parser, node);
21565 }
21566
21568 parser_lex(parser);
21569
21570 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21571 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21572
21573 pm_node_destroy(parser, node);
21574 return result;
21575 }
21576 case PM_CALL_NODE: {
21577 pm_call_node_t *cast = (pm_call_node_t *) node;
21578
21579 // If we have a vcall (a method with no arguments and no
21580 // receiver that could have been a local variable) then we
21581 // will transform it into a local variable write.
21582 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21583 pm_location_t *message_loc = &cast->message_loc;
21584 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21585
21586 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21587 parser_lex(parser);
21588
21589 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21590 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21591
21592 pm_node_destroy(parser, (pm_node_t *) cast);
21593 return result;
21594 }
21595
21596 // Move past the token here so that we have already added
21597 // the local variable by this point.
21598 parser_lex(parser);
21599
21600 // If there is no call operator and the message is "[]" then
21601 // this is an aref expression, and we can transform it into
21602 // an aset expression.
21603 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21604 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21605 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21606 }
21607
21608 // If this node cannot be writable, then we have an error.
21609 if (pm_call_node_writable_p(parser, cast)) {
21610 parse_write_name(parser, &cast->name);
21611 } else {
21612 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21613 }
21614
21615 parse_call_operator_write(parser, cast, &token);
21616 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21617 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21618 }
21619 case PM_MULTI_WRITE_NODE: {
21620 parser_lex(parser);
21621 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21622 return node;
21623 }
21624 default:
21625 parser_lex(parser);
21626
21627 // In this case we have an ||= sign, but we don't know what it's for.
21628 // We need to treat it as an error. For now, we'll mark it as an error
21629 // and just skip right past it.
21630 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21631 return node;
21632 }
21633 }
21634 case PM_TOKEN_AMPERSAND_EQUAL:
21635 case PM_TOKEN_CARET_EQUAL:
21636 case PM_TOKEN_GREATER_GREATER_EQUAL:
21637 case PM_TOKEN_LESS_LESS_EQUAL:
21638 case PM_TOKEN_MINUS_EQUAL:
21639 case PM_TOKEN_PERCENT_EQUAL:
21640 case PM_TOKEN_PIPE_EQUAL:
21641 case PM_TOKEN_PLUS_EQUAL:
21642 case PM_TOKEN_SLASH_EQUAL:
21643 case PM_TOKEN_STAR_EQUAL:
21644 case PM_TOKEN_STAR_STAR_EQUAL: {
21645 switch (PM_NODE_TYPE(node)) {
21646 case PM_BACK_REFERENCE_READ_NODE:
21647 case PM_NUMBERED_REFERENCE_READ_NODE:
21648 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21650 case PM_GLOBAL_VARIABLE_READ_NODE: {
21651 parser_lex(parser);
21652
21653 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21654 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21655
21656 pm_node_destroy(parser, node);
21657 return result;
21658 }
21659 case PM_CLASS_VARIABLE_READ_NODE: {
21660 parser_lex(parser);
21661
21662 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21663 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21664
21665 pm_node_destroy(parser, node);
21666 return result;
21667 }
21668 case PM_CONSTANT_PATH_NODE: {
21669 parser_lex(parser);
21670
21671 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21672 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21673
21674 return parse_shareable_constant_write(parser, write);
21675 }
21676 case PM_CONSTANT_READ_NODE: {
21677 parser_lex(parser);
21678
21679 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21680 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21681
21682 pm_node_destroy(parser, node);
21683 return parse_shareable_constant_write(parser, write);
21684 }
21685 case PM_INSTANCE_VARIABLE_READ_NODE: {
21686 parser_lex(parser);
21687
21688 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21689 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21690
21691 pm_node_destroy(parser, node);
21692 return result;
21693 }
21694 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21695 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21696 parser_lex(parser);
21697
21698 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21699 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21700
21701 parse_target_implicit_parameter(parser, node);
21702 pm_node_destroy(parser, node);
21703 return result;
21704 }
21705 case PM_LOCAL_VARIABLE_READ_NODE: {
21706 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21707 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21708 parse_target_implicit_parameter(parser, node);
21709 }
21710
21712 parser_lex(parser);
21713
21714 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21715 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21716
21717 pm_node_destroy(parser, node);
21718 return result;
21719 }
21720 case PM_CALL_NODE: {
21721 parser_lex(parser);
21722 pm_call_node_t *cast = (pm_call_node_t *) node;
21723
21724 // If we have a vcall (a method with no arguments and no
21725 // receiver that could have been a local variable) then we
21726 // will transform it into a local variable write.
21727 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21728 pm_location_t *message_loc = &cast->message_loc;
21729 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21730
21731 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21732 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21733 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21734
21735 pm_node_destroy(parser, (pm_node_t *) cast);
21736 return result;
21737 }
21738
21739 // If there is no call operator and the message is "[]" then
21740 // this is an aref expression, and we can transform it into
21741 // an aset expression.
21742 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21743 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21744 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21745 }
21746
21747 // If this node cannot be writable, then we have an error.
21748 if (pm_call_node_writable_p(parser, cast)) {
21749 parse_write_name(parser, &cast->name);
21750 } else {
21751 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21752 }
21753
21754 parse_call_operator_write(parser, cast, &token);
21755 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21756 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21757 }
21758 case PM_MULTI_WRITE_NODE: {
21759 parser_lex(parser);
21760 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21761 return node;
21762 }
21763 default:
21764 parser_lex(parser);
21765
21766 // In this case we have an operator but we don't know what it's for.
21767 // We need to treat it as an error. For now, we'll mark it as an error
21768 // and just skip right past it.
21769 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21770 return node;
21771 }
21772 }
21773 case PM_TOKEN_AMPERSAND_AMPERSAND:
21774 case PM_TOKEN_KEYWORD_AND: {
21775 parser_lex(parser);
21776
21777 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21778 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21779 }
21780 case PM_TOKEN_KEYWORD_OR:
21781 case PM_TOKEN_PIPE_PIPE: {
21782 parser_lex(parser);
21783
21784 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21785 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21786 }
21787 case PM_TOKEN_EQUAL_TILDE: {
21788 // Note that we _must_ parse the value before adding the local
21789 // variables in order to properly mirror the behavior of Ruby. For
21790 // example,
21791 //
21792 // /(?<foo>bar)/ =~ foo
21793 //
21794 // In this case, `foo` should be a method call and not a local yet.
21795 parser_lex(parser);
21796 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21797
21798 // By default, we're going to create a call node and then return it.
21799 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21800 pm_node_t *result = (pm_node_t *) call;
21801
21802 // If the receiver of this =~ is a regular expression node, then we
21803 // need to introduce local variables for it based on its named
21804 // capture groups.
21805 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21806 // It's possible to have an interpolated regular expression node
21807 // that only contains strings. This is because it can be split
21808 // up by a heredoc. In this case we need to concat the unescaped
21809 // strings together and then parse them as a regular expression.
21811
21812 bool interpolated = false;
21813 size_t total_length = 0;
21814
21815 pm_node_t *part;
21816 PM_NODE_LIST_FOREACH(parts, index, part) {
21817 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21818 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21819 } else {
21820 interpolated = true;
21821 break;
21822 }
21823 }
21824
21825 if (!interpolated && total_length > 0) {
21826 void *memory = xmalloc(total_length);
21827 if (!memory) abort();
21828
21829 uint8_t *cursor = memory;
21830 PM_NODE_LIST_FOREACH(parts, index, part) {
21831 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21832 size_t length = pm_string_length(unescaped);
21833
21834 memcpy(cursor, pm_string_source(unescaped), length);
21835 cursor += length;
21836 }
21837
21838 pm_string_t owned;
21839 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21840
21841 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21842 pm_string_free(&owned);
21843 }
21844 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21845 // If we have a regular expression node, then we can just parse
21846 // the named captures directly off the unescaped string.
21847 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21848 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21849 }
21850
21851 return result;
21852 }
21853 case PM_TOKEN_UAMPERSAND:
21854 case PM_TOKEN_USTAR:
21855 case PM_TOKEN_USTAR_STAR:
21856 // The only times this will occur are when we are in an error state,
21857 // but we'll put them in here so that errors can propagate.
21858 case PM_TOKEN_BANG_EQUAL:
21859 case PM_TOKEN_BANG_TILDE:
21860 case PM_TOKEN_EQUAL_EQUAL:
21861 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21862 case PM_TOKEN_LESS_EQUAL_GREATER:
21863 case PM_TOKEN_CARET:
21864 case PM_TOKEN_PIPE:
21865 case PM_TOKEN_AMPERSAND:
21866 case PM_TOKEN_GREATER_GREATER:
21867 case PM_TOKEN_LESS_LESS:
21868 case PM_TOKEN_MINUS:
21869 case PM_TOKEN_PLUS:
21870 case PM_TOKEN_PERCENT:
21871 case PM_TOKEN_SLASH:
21872 case PM_TOKEN_STAR:
21873 case PM_TOKEN_STAR_STAR: {
21874 parser_lex(parser);
21875 pm_token_t operator = parser->previous;
21876 switch (PM_NODE_TYPE(node)) {
21877 case PM_RESCUE_MODIFIER_NODE: {
21879 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21880 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21881 }
21882 break;
21883 }
21884 case PM_AND_NODE: {
21885 pm_and_node_t *cast = (pm_and_node_t *) node;
21886 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21887 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21888 }
21889 break;
21890 }
21891 case PM_OR_NODE: {
21892 pm_or_node_t *cast = (pm_or_node_t *) node;
21893 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21894 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21895 }
21896 break;
21897 }
21898 default:
21899 break;
21900 }
21901
21902 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21903 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21904 }
21905 case PM_TOKEN_GREATER:
21906 case PM_TOKEN_GREATER_EQUAL:
21907 case PM_TOKEN_LESS:
21908 case PM_TOKEN_LESS_EQUAL: {
21909 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21910 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21911 }
21912
21913 parser_lex(parser);
21914 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21915 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21916 }
21917 case PM_TOKEN_AMPERSAND_DOT:
21918 case PM_TOKEN_DOT: {
21919 parser_lex(parser);
21920 pm_token_t operator = parser->previous;
21921 pm_arguments_t arguments = { 0 };
21922
21923 // This if statement handles the foo.() syntax.
21924 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21925 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21926 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21927 }
21928
21929 switch (PM_NODE_TYPE(node)) {
21930 case PM_RESCUE_MODIFIER_NODE: {
21932 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21933 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21934 }
21935 break;
21936 }
21937 case PM_AND_NODE: {
21938 pm_and_node_t *cast = (pm_and_node_t *) node;
21939 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21940 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21941 }
21942 break;
21943 }
21944 case PM_OR_NODE: {
21945 pm_or_node_t *cast = (pm_or_node_t *) node;
21946 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21947 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21948 }
21949 break;
21950 }
21951 default:
21952 break;
21953 }
21954
21955 pm_token_t message;
21956
21957 switch (parser->current.type) {
21958 case PM_CASE_OPERATOR:
21959 case PM_CASE_KEYWORD:
21960 case PM_TOKEN_CONSTANT:
21961 case PM_TOKEN_IDENTIFIER:
21962 case PM_TOKEN_METHOD_NAME: {
21963 parser_lex(parser);
21964 message = parser->previous;
21965 break;
21966 }
21967 default: {
21968 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21969 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21970 }
21971 }
21972
21973 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21974 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21975
21976 if (
21977 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21978 arguments.arguments == NULL &&
21979 arguments.opening_loc.start == NULL &&
21980 match1(parser, PM_TOKEN_COMMA)
21981 ) {
21982 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21983 } else {
21984 return (pm_node_t *) call;
21985 }
21986 }
21987 case PM_TOKEN_DOT_DOT:
21988 case PM_TOKEN_DOT_DOT_DOT: {
21989 parser_lex(parser);
21990
21991 pm_node_t *right = NULL;
21992 if (token_begins_expression_p(parser->current.type)) {
21993 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21994 }
21995
21996 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21997 }
21998 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21999 pm_token_t keyword = parser->current;
22000 parser_lex(parser);
22001
22002 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
22003 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
22004 }
22005 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
22006 pm_token_t keyword = parser->current;
22007 parser_lex(parser);
22008
22009 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
22010 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
22011 }
22012 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
22013 parser_lex(parser);
22014 pm_statements_node_t *statements = pm_statements_node_create(parser);
22015 pm_statements_node_body_append(parser, statements, node, true);
22016
22017 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
22018 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
22019 }
22020 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
22021 parser_lex(parser);
22022 pm_statements_node_t *statements = pm_statements_node_create(parser);
22023 pm_statements_node_body_append(parser, statements, node, true);
22024
22025 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
22026 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
22027 }
22028 case PM_TOKEN_QUESTION_MARK: {
22029 context_push(parser, PM_CONTEXT_TERNARY);
22030 pm_node_list_t current_block_exits = { 0 };
22031 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22032
22033 pm_token_t qmark = parser->current;
22034 parser_lex(parser);
22035
22036 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
22037
22038 if (parser->recovering) {
22039 // If parsing the true expression of this ternary resulted in a syntax
22040 // error that we can recover from, then we're going to put missing nodes
22041 // and tokens into the remaining places. We want to be sure to do this
22042 // before the `expect` function call to make sure it doesn't
22043 // accidentally move past a ':' token that occurs after the syntax
22044 // error.
22045 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
22046 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
22047
22048 context_pop(parser);
22049 pop_block_exits(parser, previous_block_exits);
22050 pm_node_list_free(&current_block_exits);
22051
22052 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22053 }
22054
22055 accept1(parser, PM_TOKEN_NEWLINE);
22056 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
22057
22058 pm_token_t colon = parser->previous;
22059 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
22060
22061 context_pop(parser);
22062 pop_block_exits(parser, previous_block_exits);
22063 pm_node_list_free(&current_block_exits);
22064
22065 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
22066 }
22067 case PM_TOKEN_COLON_COLON: {
22068 parser_lex(parser);
22069 pm_token_t delimiter = parser->previous;
22070
22071 switch (parser->current.type) {
22072 case PM_TOKEN_CONSTANT: {
22073 parser_lex(parser);
22074 pm_node_t *path;
22075
22076 if (
22077 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
22078 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
22079 ) {
22080 // If we have a constant immediately following a '::' operator, then
22081 // this can either be a constant path or a method call, depending on
22082 // what follows the constant.
22083 //
22084 // If we have parentheses, then this is a method call. That would
22085 // look like Foo::Bar().
22086 pm_token_t message = parser->previous;
22087 pm_arguments_t arguments = { 0 };
22088
22089 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22090 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22091 } else {
22092 // Otherwise, this is a constant path. That would look like Foo::Bar.
22093 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22094 }
22095
22096 // If this is followed by a comma then it is a multiple assignment.
22097 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22098 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22099 }
22100
22101 return path;
22102 }
22103 case PM_CASE_OPERATOR:
22104 case PM_CASE_KEYWORD:
22105 case PM_TOKEN_IDENTIFIER:
22106 case PM_TOKEN_METHOD_NAME: {
22107 parser_lex(parser);
22108 pm_token_t message = parser->previous;
22109
22110 // If we have an identifier following a '::' operator, then it is for
22111 // sure a method call.
22112 pm_arguments_t arguments = { 0 };
22113 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
22114 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
22115
22116 // If this is followed by a comma then it is a multiple assignment.
22117 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22118 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22119 }
22120
22121 return (pm_node_t *) call;
22122 }
22123 case PM_TOKEN_PARENTHESIS_LEFT: {
22124 // If we have a parenthesis following a '::' operator, then it is the
22125 // method call shorthand. That would look like Foo::(bar).
22126 pm_arguments_t arguments = { 0 };
22127 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
22128
22129 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
22130 }
22131 default: {
22132 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
22133 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22134 }
22135 }
22136 }
22137 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
22138 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
22139 parser_lex(parser);
22140 accept1(parser, PM_TOKEN_NEWLINE);
22141
22142 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
22143 context_pop(parser);
22144
22145 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
22146 }
22147 case PM_TOKEN_BRACKET_LEFT: {
22148 parser_lex(parser);
22149
22150 pm_arguments_t arguments = { 0 };
22151 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22152
22153 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22154 pm_accepts_block_stack_push(parser, true);
22155 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22156 pm_accepts_block_stack_pop(parser);
22157 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22158 }
22159
22160 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22161
22162 // If we have a comma after the closing bracket then this is a multiple
22163 // assignment and we should parse the targets.
22164 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22165 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22166 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22167 }
22168
22169 // If we're at the end of the arguments, we can now check if there is a
22170 // block node that starts with a {. If there is, then we can parse it and
22171 // add it to the arguments.
22172 pm_block_node_t *block = NULL;
22173 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22174 block = parse_block(parser, (uint16_t) (depth + 1));
22175 pm_arguments_validate_block(parser, &arguments, block);
22176 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22177 block = parse_block(parser, (uint16_t) (depth + 1));
22178 }
22179
22180 if (block != NULL) {
22181 if (arguments.block != NULL) {
22182 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22183 if (arguments.arguments == NULL) {
22184 arguments.arguments = pm_arguments_node_create(parser);
22185 }
22186 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22187 }
22188
22189 arguments.block = (pm_node_t *) block;
22190 }
22191
22192 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22193 }
22194 case PM_TOKEN_KEYWORD_IN: {
22195 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22196 parser->pattern_matching_newlines = true;
22197
22198 pm_token_t operator = parser->current;
22199 parser->command_start = false;
22200 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22201 parser_lex(parser);
22202
22203 pm_constant_id_list_t captures = { 0 };
22204 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22205
22206 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22207 pm_constant_id_list_free(&captures);
22208
22209 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22210 }
22211 case PM_TOKEN_EQUAL_GREATER: {
22212 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22213 parser->pattern_matching_newlines = true;
22214
22215 pm_token_t operator = parser->current;
22216 parser->command_start = false;
22217 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22218 parser_lex(parser);
22219
22220 pm_constant_id_list_t captures = { 0 };
22221 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22222
22223 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22224 pm_constant_id_list_free(&captures);
22225
22226 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22227 }
22228 default:
22229 assert(false && "unreachable");
22230 return NULL;
22231 }
22232}
22233
22234#undef PM_PARSE_PATTERN_SINGLE
22235#undef PM_PARSE_PATTERN_TOP
22236#undef PM_PARSE_PATTERN_MULTI
22237
22242static inline bool
22243pm_call_node_command_p(const pm_call_node_t *node) {
22244 return (
22245 (node->opening_loc.start == NULL) &&
22246 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22247 (node->arguments != NULL || node->block != NULL)
22248 );
22249}
22250
22259static pm_node_t *
22260parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22261 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22262 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22263 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22264 }
22265
22266 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22267
22268 switch (PM_NODE_TYPE(node)) {
22269 case PM_MISSING_NODE:
22270 // If we found a syntax error, then the type of node returned by
22271 // parse_expression_prefix is going to be a missing node.
22272 return node;
22273 case PM_PRE_EXECUTION_NODE:
22274 case PM_POST_EXECUTION_NODE:
22275 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22276 case PM_ALIAS_METHOD_NODE:
22277 case PM_MULTI_WRITE_NODE:
22278 case PM_UNDEF_NODE:
22279 // These expressions are statements, and cannot be followed by
22280 // operators (except modifiers).
22281 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22282 return node;
22283 }
22284 break;
22285 case PM_CALL_NODE:
22286 // If we have a call node, then we need to check if it looks like a
22287 // method call without parentheses that contains arguments. If it
22288 // does, then it has different rules for parsing infix operators,
22289 // namely that it only accepts composition (and/or) and modifiers
22290 // (if/unless/etc.).
22291 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22292 return node;
22293 }
22294 break;
22295 case PM_SYMBOL_NODE:
22296 // If we have a symbol node that is being parsed as a label, then we
22297 // need to immediately return, because there should never be an
22298 // infix operator following this node.
22299 if (pm_symbol_node_label_p(node)) {
22300 return node;
22301 }
22302 break;
22303 default:
22304 break;
22305 }
22306
22307 // Otherwise we'll look and see if the next token can be parsed as an infix
22308 // operator. If it can, then we'll parse it using parse_expression_infix.
22309 pm_binding_powers_t current_binding_powers;
22310 pm_token_type_t current_token_type;
22311
22312 while (
22313 current_token_type = parser->current.type,
22314 current_binding_powers = pm_binding_powers[current_token_type],
22315 binding_power <= current_binding_powers.left &&
22316 current_binding_powers.binary
22317 ) {
22318 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22319
22320 if (context_terminator(parser->current_context->context, &parser->current)) {
22321 // If this token terminates the current context, then we need to
22322 // stop parsing the expression, as it has become a statement.
22323 return node;
22324 }
22325
22326 switch (PM_NODE_TYPE(node)) {
22327 case PM_MULTI_WRITE_NODE:
22328 // Multi-write nodes are statements, and cannot be followed by
22329 // operators except modifiers.
22330 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22331 return node;
22332 }
22333 break;
22334 case PM_CLASS_VARIABLE_WRITE_NODE:
22335 case PM_CONSTANT_PATH_WRITE_NODE:
22336 case PM_CONSTANT_WRITE_NODE:
22337 case PM_GLOBAL_VARIABLE_WRITE_NODE:
22338 case PM_INSTANCE_VARIABLE_WRITE_NODE:
22339 case PM_LOCAL_VARIABLE_WRITE_NODE:
22340 // These expressions are statements, by virtue of the right-hand
22341 // side of their write being an implicit array.
22342 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22343 return node;
22344 }
22345 break;
22346 case PM_CALL_NODE:
22347 // These expressions are also statements, by virtue of the
22348 // right-hand side of the expression (i.e., the last argument to
22349 // the call node) being an implicit array.
22350 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22351 return node;
22352 }
22353 break;
22354 default:
22355 break;
22356 }
22357
22358 // If the operator is nonassoc and we should not be able to parse the
22359 // upcoming infix operator, break.
22360 if (current_binding_powers.nonassoc) {
22361 // If this is a non-assoc operator and we are about to parse the
22362 // exact same operator, then we need to add an error.
22363 if (match1(parser, current_token_type)) {
22364 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22365 break;
22366 }
22367
22368 // If this is an endless range, then we need to reject a couple of
22369 // additional operators because it violates the normal operator
22370 // precedence rules. Those patterns are:
22371 //
22372 // 1.. & 2
22373 // 1.. * 2
22374 //
22375 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22376 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22377 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22378 break;
22379 }
22380
22381 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22382 break;
22383 }
22384 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22385 break;
22386 }
22387 }
22388
22389 if (accepts_command_call) {
22390 // A command-style method call is only accepted on method chains.
22391 // Thus, we check whether the parsed node can continue method chains.
22392 // The method chain can continue if the parsed node is one of the following five kinds:
22393 // (1) index access: foo[1]
22394 // (2) attribute access: foo.bar
22395 // (3) method call with parenthesis: foo.bar(1)
22396 // (4) method call with a block: foo.bar do end
22397 // (5) constant path: foo::Bar
22398 switch (node->type) {
22399 case PM_CALL_NODE: {
22400 pm_call_node_t *cast = (pm_call_node_t *)node;
22401 if (
22402 // (1) foo[1]
22403 !(
22404 cast->call_operator_loc.start == NULL &&
22405 cast->message_loc.start != NULL &&
22406 cast->message_loc.start[0] == '[' &&
22407 cast->message_loc.end[-1] == ']'
22408 ) &&
22409 // (2) foo.bar
22410 !(
22411 cast->call_operator_loc.start != NULL &&
22412 cast->arguments == NULL &&
22413 cast->block == NULL &&
22414 cast->opening_loc.start == NULL
22415 ) &&
22416 // (3) foo.bar(1)
22417 !(
22418 cast->call_operator_loc.start != NULL &&
22419 cast->opening_loc.start != NULL
22420 ) &&
22421 // (4) foo.bar do end
22422 !(
22423 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22424 )
22425 ) {
22426 accepts_command_call = false;
22427 }
22428 break;
22429 }
22430 // (5) foo::Bar
22431 case PM_CONSTANT_PATH_NODE:
22432 break;
22433 default:
22434 accepts_command_call = false;
22435 break;
22436 }
22437 }
22438 }
22439
22440 return node;
22441}
22442
22447static pm_statements_node_t *
22448wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22449 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22450 if (statements == NULL) {
22451 statements = pm_statements_node_create(parser);
22452 }
22453
22454 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22455 pm_arguments_node_arguments_append(
22456 arguments,
22457 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22458 );
22459
22460 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22461 parser,
22462 arguments,
22463 pm_parser_constant_id_constant(parser, "print", 5)
22464 ), true);
22465 }
22466
22467 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22468 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22469 if (statements == NULL) {
22470 statements = pm_statements_node_create(parser);
22471 }
22472
22473 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22474 pm_arguments_node_arguments_append(
22475 arguments,
22476 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22477 );
22478
22479 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22480 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22481
22482 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22483 parser,
22484 pm_parser_constant_id_constant(parser, "$F", 2),
22485 (pm_node_t *) call
22486 );
22487
22488 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22489 }
22490
22491 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22492 pm_arguments_node_arguments_append(
22493 arguments,
22494 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22495 );
22496
22497 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22498 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22499 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22500 parser,
22501 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22502 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22503 (pm_node_t *) pm_true_node_synthesized_create(parser)
22504 ));
22505
22506 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22507 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22508 }
22509
22510 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22511 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22512 parser,
22513 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22514 statements
22515 ), true);
22516
22517 statements = wrapped_statements;
22518 }
22519
22520 return statements;
22521}
22522
22526static pm_node_t *
22527parse_program(pm_parser_t *parser) {
22528 // If the current scope is NULL, then we want to push a new top level scope.
22529 // The current scope could exist in the event that we are parsing an eval
22530 // and the user has passed into scopes that already exist.
22531 if (parser->current_scope == NULL) {
22532 pm_parser_scope_push(parser, true);
22533 }
22534
22535 pm_node_list_t current_block_exits = { 0 };
22536 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22537
22538 parser_lex(parser);
22539 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22540
22541 if (statements != NULL && !parser->parsing_eval) {
22542 // If we have statements, then the top-level statement should be
22543 // explicitly checked as well. We have to do this here because
22544 // everywhere else we check all but the last statement.
22545 assert(statements->body.size > 0);
22546 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22547 }
22548
22549 pm_constant_id_list_t locals;
22550 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22551 pm_parser_scope_pop(parser);
22552
22553 // At the top level, see if we need to wrap the statements in a program
22554 // node with a while loop based on the options.
22556 statements = wrap_statements(parser, statements);
22557 } else {
22558 flush_block_exits(parser, previous_block_exits);
22559 }
22560
22561 pm_node_list_free(&current_block_exits);
22562
22563 // If this is an empty file, then we're still going to parse all of the
22564 // statements in order to gather up all of the comments and such. Here we'll
22565 // correct the location information.
22566 if (statements == NULL) {
22567 statements = pm_statements_node_create(parser);
22568 pm_statements_node_location_set(statements, parser->start, parser->start);
22569 }
22570
22571 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22572}
22573
22574/******************************************************************************/
22575/* External functions */
22576/******************************************************************************/
22577
22587static const char *
22588pm_strnstr(const char *big, const char *little, size_t big_length) {
22589 size_t little_length = strlen(little);
22590
22591 for (const char *big_end = big + big_length; big < big_end; big++) {
22592 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22593 }
22594
22595 return NULL;
22596}
22597
22598#ifdef _WIN32
22599#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22600#else
22606static void
22607pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22608 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22609 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22610 }
22611}
22612#endif
22613
22618static void
22619pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22620 const char *switches = pm_strnstr(engine, " -", length);
22621 if (switches == NULL) return;
22622
22623 pm_options_t next_options = *options;
22624 options->shebang_callback(
22625 &next_options,
22626 (const uint8_t *) (switches + 1),
22627 length - ((size_t) (switches - engine)) - 1,
22628 options->shebang_callback_data
22629 );
22630
22631 size_t encoding_length;
22632 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22633 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22634 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22635 }
22636
22637 parser->command_line = next_options.command_line;
22638 parser->frozen_string_literal = next_options.frozen_string_literal;
22639}
22640
22645pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22646 assert(source != NULL);
22647
22648 *parser = (pm_parser_t) {
22649 .node_id = 0,
22650 .lex_state = PM_LEX_STATE_BEG,
22651 .enclosure_nesting = 0,
22652 .lambda_enclosure_nesting = -1,
22653 .brace_nesting = 0,
22654 .do_loop_stack = 0,
22655 .accepts_block_stack = 0,
22656 .lex_modes = {
22657 .index = 0,
22658 .stack = {{ .mode = PM_LEX_DEFAULT }},
22659 .current = &parser->lex_modes.stack[0],
22660 },
22661 .start = source,
22662 .end = source + size,
22663 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22664 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22665 .next_start = NULL,
22666 .heredoc_end = NULL,
22667 .data_loc = { .start = NULL, .end = NULL },
22668 .comment_list = { 0 },
22669 .magic_comment_list = { 0 },
22670 .warning_list = { 0 },
22671 .error_list = { 0 },
22672 .current_scope = NULL,
22673 .current_context = NULL,
22674 .encoding = PM_ENCODING_UTF_8_ENTRY,
22675 .encoding_changed_callback = NULL,
22676 .encoding_comment_start = source,
22677 .lex_callback = NULL,
22678 .filepath = { 0 },
22679 .constant_pool = { 0 },
22680 .newline_list = { 0 },
22681 .integer_base = 0,
22682 .current_string = PM_STRING_EMPTY,
22683 .start_line = 1,
22684 .explicit_encoding = NULL,
22685 .command_line = 0,
22686 .parsing_eval = false,
22687 .partial_script = false,
22688 .command_start = true,
22689 .recovering = false,
22690 .encoding_locked = false,
22691 .encoding_changed = false,
22692 .pattern_matching_newlines = false,
22693 .in_keyword_arg = false,
22694 .current_block_exits = NULL,
22695 .semantic_token_seen = false,
22696 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22697 .current_regular_expression_ascii_only = false,
22698 .warn_mismatched_indentation = true
22699 };
22700
22701 // Initialize the constant pool. We're going to completely guess as to the
22702 // number of constants that we'll need based on the size of the input. The
22703 // ratio we chose here is actually less arbitrary than you might think.
22704 //
22705 // We took ~50K Ruby files and measured the size of the file versus the
22706 // number of constants that were found in those files. Then we found the
22707 // average and standard deviation of the ratios of constants/bytesize. Then
22708 // we added 1.34 standard deviations to the average to get a ratio that
22709 // would fit 75% of the files (for a two-tailed distribution). This works
22710 // because there was about a 0.77 correlation and the distribution was
22711 // roughly normal.
22712 //
22713 // This ratio will need to change if we add more constants to the constant
22714 // pool for another node type.
22715 uint32_t constant_size = ((uint32_t) size) / 95;
22716 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22717
22718 // Initialize the newline list. Similar to the constant pool, we're going to
22719 // guess at the number of newlines that we'll need based on the size of the
22720 // input.
22721 size_t newline_size = size / 22;
22722 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22723
22724 // If options were provided to this parse, establish them here.
22725 if (options != NULL) {
22726 // filepath option
22727 parser->filepath = options->filepath;
22728
22729 // line option
22730 parser->start_line = options->line;
22731
22732 // encoding option
22733 size_t encoding_length = pm_string_length(&options->encoding);
22734 if (encoding_length > 0) {
22735 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22736 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22737 }
22738
22739 // encoding_locked option
22740 parser->encoding_locked = options->encoding_locked;
22741
22742 // frozen_string_literal option
22744
22745 // command_line option
22746 parser->command_line = options->command_line;
22747
22748 // version option
22749 parser->version = options->version;
22750
22751 // partial_script
22752 parser->partial_script = options->partial_script;
22753
22754 // scopes option
22755 parser->parsing_eval = options->scopes_count > 0;
22756 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22757
22758 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22759 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22760 pm_parser_scope_push(parser, scope_index == 0);
22761
22762 // Scopes given from the outside are not allowed to have numbered
22763 // parameters.
22764 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22765
22766 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22767 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22768
22769 const uint8_t *source = pm_string_source(local);
22770 size_t length = pm_string_length(local);
22771
22772 void *allocated = xmalloc(length);
22773 if (allocated == NULL) continue;
22774
22775 memcpy(allocated, source, length);
22776 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22777 }
22778 }
22779 }
22780
22781 // Now that we have established the user-provided options, check if
22782 // a version was given and parse as the latest version otherwise.
22783 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22785 }
22786
22787 pm_accepts_block_stack_push(parser, true);
22788
22789 // Skip past the UTF-8 BOM if it exists.
22790 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22791 parser->current.end += 3;
22792 parser->encoding_comment_start += 3;
22793
22794 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22796 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22797 }
22798 }
22799
22800 // If the -x command line flag is set, or the first shebang of the file does
22801 // not include "ruby", then we'll search for a shebang that does include
22802 // "ruby" and start parsing from there.
22803 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22804
22805 // If the first two bytes of the source are a shebang, then we will do a bit
22806 // of extra processing.
22807 //
22808 // First, we'll indicate that the encoding comment is at the end of the
22809 // shebang. This means that when a shebang is present the encoding comment
22810 // can begin on the second line.
22811 //
22812 // Second, we will check if the shebang includes "ruby". If it does, then we
22813 // we will start parsing from there. We will also potentially warning the
22814 // user if there is a carriage return at the end of the shebang. We will
22815 // also potentially call the shebang callback if this is the main script to
22816 // allow the caller to parse the shebang and find any command-line options.
22817 // If the shebang does not include "ruby" and this is the main script being
22818 // parsed, then we will start searching the file for a shebang that does
22819 // contain "ruby" as if -x were passed on the command line.
22820 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22821 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22822
22823 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22824 const char *engine;
22825
22826 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22827 if (newline != NULL) {
22828 parser->encoding_comment_start = newline + 1;
22829
22830 if (options == NULL || options->main_script) {
22831 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22832 }
22833 }
22834
22835 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22836 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22837 }
22838
22839 search_shebang = false;
22840 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22841 search_shebang = true;
22842 }
22843 }
22844
22845 // Here we're going to find the first shebang that includes "ruby" and start
22846 // parsing from there.
22847 if (search_shebang) {
22848 // If a shebang that includes "ruby" is not found, then we're going to a
22849 // a load error to the list of errors on the parser.
22850 bool found_shebang = false;
22851
22852 // This is going to point to the start of each line as we check it.
22853 // We'll maintain a moving window looking at each line at they come.
22854 const uint8_t *cursor = parser->start;
22855
22856 // The newline pointer points to the end of the current line that we're
22857 // considering. If it is NULL, then we're at the end of the file.
22858 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22859
22860 while (newline != NULL) {
22861 pm_newline_list_append(&parser->newline_list, newline);
22862
22863 cursor = newline + 1;
22864 newline = next_newline(cursor, parser->end - cursor);
22865
22866 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22867 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22868 const char *engine;
22869 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22870 found_shebang = true;
22871
22872 if (newline != NULL) {
22873 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22874 parser->encoding_comment_start = newline + 1;
22875 }
22876
22877 if (options != NULL && options->shebang_callback != NULL) {
22878 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22879 }
22880
22881 break;
22882 }
22883 }
22884 }
22885
22886 if (found_shebang) {
22887 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22888 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22889 } else {
22890 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22891 pm_newline_list_clear(&parser->newline_list);
22892 }
22893 }
22894
22895 // The encoding comment can start after any amount of inline whitespace, so
22896 // here we'll advance it to the first non-inline-whitespace character so
22897 // that it is ready for future comparisons.
22898 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22899}
22900
22909
22913static inline void
22914pm_comment_list_free(pm_list_t *list) {
22915 pm_list_node_t *node, *next;
22916
22917 for (node = list->head; node != NULL; node = next) {
22918 next = node->next;
22919
22920 pm_comment_t *comment = (pm_comment_t *) node;
22921 xfree(comment);
22922 }
22923}
22924
22928static inline void
22929pm_magic_comment_list_free(pm_list_t *list) {
22930 pm_list_node_t *node, *next;
22931
22932 for (node = list->head; node != NULL; node = next) {
22933 next = node->next;
22934
22937 }
22938}
22939
22945 pm_string_free(&parser->filepath);
22946 pm_diagnostic_list_free(&parser->error_list);
22947 pm_diagnostic_list_free(&parser->warning_list);
22948 pm_comment_list_free(&parser->comment_list);
22949 pm_magic_comment_list_free(&parser->magic_comment_list);
22950 pm_constant_pool_free(&parser->constant_pool);
22951 pm_newline_list_free(&parser->newline_list);
22952
22953 while (parser->current_scope != NULL) {
22954 // Normally, popping the scope doesn't free the locals since it is
22955 // assumed that ownership has transferred to the AST. However if we have
22956 // scopes while we're freeing the parser, it's likely they came from
22957 // eval scopes and we need to free them explicitly here.
22958 pm_parser_scope_pop(parser);
22959 }
22960
22961 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22962 lex_mode_pop(parser);
22963 }
22964}
22965
22971 return parse_program(parser);
22972}
22973
22979static bool
22980pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22981#define LINE_SIZE 4096
22982 char line[LINE_SIZE];
22983
22984 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22985 size_t length = LINE_SIZE;
22986 while (length > 0 && line[length - 1] == '\n') length--;
22987
22988 if (length == LINE_SIZE) {
22989 // If we read a line that is the maximum size and it doesn't end
22990 // with a newline, then we'll just append it to the buffer and
22991 // continue reading.
22992 length--;
22993 pm_buffer_append_string(buffer, line, length);
22994 continue;
22995 }
22996
22997 // Append the line to the buffer.
22998 length--;
22999 pm_buffer_append_string(buffer, line, length);
23000
23001 // Check if the line matches the __END__ marker. If it does, then stop
23002 // reading and return false. In most circumstances, this means we should
23003 // stop reading from the stream so that the DATA constant can pick it
23004 // up.
23005 switch (length) {
23006 case 7:
23007 if (strncmp(line, "__END__", 7) == 0) return false;
23008 break;
23009 case 8:
23010 if (strncmp(line, "__END__\n", 8) == 0) return false;
23011 break;
23012 case 9:
23013 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
23014 break;
23015 }
23016
23017 // All data should be read via gets. If the string returned by gets
23018 // _doesn't_ end with a newline, then we assume we hit EOF condition.
23019 if (stream_feof(stream)) {
23020 break;
23021 }
23022 }
23023
23024 return true;
23025#undef LINE_SIZE
23026}
23027
23037static bool
23038pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
23039 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
23040
23041 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
23042 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
23043 return true;
23044 }
23045 }
23046
23047 return false;
23048}
23049
23057pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
23058 pm_buffer_init(buffer);
23059
23060 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23061
23062 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23063 pm_node_t *node = pm_parse(parser);
23064
23065 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
23066 pm_node_destroy(parser, node);
23067 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
23068
23069 pm_parser_free(parser);
23070 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
23071 node = pm_parse(parser);
23072 }
23073
23074 return node;
23075}
23076
23081pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
23082 pm_options_t options = { 0 };
23083 pm_options_read(&options, data);
23084
23085 pm_parser_t parser;
23086 pm_parser_init(&parser, source, size, &options);
23087
23088 pm_node_t *node = pm_parse(&parser);
23089 pm_node_destroy(&parser, node);
23090
23091 bool result = parser.error_list.size == 0;
23092 pm_parser_free(&parser);
23093 pm_options_free(&options);
23094
23095 return result;
23096}
23097
23098#undef PM_CASE_KEYWORD
23099#undef PM_CASE_OPERATOR
23100#undef PM_CASE_WRITABLE
23101#undef PM_STRING_EMPTY
23102#undef PM_LOCATION_NODE_BASE_VALUE
23103#undef PM_LOCATION_NODE_VALUE
23104#undef PM_LOCATION_NULL_VALUE
23105#undef PM_LOCATION_TOKEN_VALUE
23106
23107// We optionally support serializing to a binary string. For systems that don't
23108// want or need this functionality, it can be turned off with the
23109// PRISM_EXCLUDE_SERIALIZATION define.
23110#ifndef PRISM_EXCLUDE_SERIALIZATION
23111
23112static inline void
23113pm_serialize_header(pm_buffer_t *buffer) {
23114 pm_buffer_append_string(buffer, "PRISM", 5);
23115 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
23116 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
23117 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
23118 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
23119}
23120
23125pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
23126 pm_serialize_header(buffer);
23127 pm_serialize_content(parser, node, buffer);
23128 pm_buffer_append_byte(buffer, '\0');
23129}
23130
23136pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23137 pm_options_t options = { 0 };
23138 pm_options_read(&options, data);
23139
23140 pm_parser_t parser;
23141 pm_parser_init(&parser, source, size, &options);
23142
23143 pm_node_t *node = pm_parse(&parser);
23144
23145 pm_serialize_header(buffer);
23146 pm_serialize_content(&parser, node, buffer);
23147 pm_buffer_append_byte(buffer, '\0');
23148
23149 pm_node_destroy(&parser, node);
23150 pm_parser_free(&parser);
23151 pm_options_free(&options);
23152}
23153
23159pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
23160 pm_parser_t parser;
23161 pm_options_t options = { 0 };
23162 pm_options_read(&options, data);
23163
23164 pm_buffer_t parser_buffer;
23165 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
23166 pm_serialize_header(buffer);
23167 pm_serialize_content(&parser, node, buffer);
23168 pm_buffer_append_byte(buffer, '\0');
23169
23170 pm_node_destroy(&parser, node);
23171 pm_buffer_free(&parser_buffer);
23172 pm_parser_free(&parser);
23173 pm_options_free(&options);
23174}
23175
23180pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23181 pm_options_t options = { 0 };
23182 pm_options_read(&options, data);
23183
23184 pm_parser_t parser;
23185 pm_parser_init(&parser, source, size, &options);
23186
23187 pm_node_t *node = pm_parse(&parser);
23188 pm_serialize_header(buffer);
23189 pm_serialize_encoding(parser.encoding, buffer);
23190 pm_buffer_append_varsint(buffer, parser.start_line);
23191 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23192
23193 pm_node_destroy(&parser, node);
23194 pm_parser_free(&parser);
23195 pm_options_free(&options);
23196}
23197
23198#endif
23199
23200/******************************************************************************/
23201/* Slice queries for the Ruby API */
23202/******************************************************************************/
23203
23205typedef enum {
23207 PM_SLICE_TYPE_ERROR = -1,
23208
23210 PM_SLICE_TYPE_NONE,
23211
23213 PM_SLICE_TYPE_LOCAL,
23214
23216 PM_SLICE_TYPE_CONSTANT,
23217
23219 PM_SLICE_TYPE_METHOD_NAME
23220} pm_slice_type_t;
23221
23225pm_slice_type_t
23226pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23227 // first, get the right encoding object
23228 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23229 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23230
23231 // check that there is at least one character
23232 if (length == 0) return PM_SLICE_TYPE_NONE;
23233
23234 size_t width;
23235 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23236 // valid because alphabetical
23237 } else if (*source == '_') {
23238 // valid because underscore
23239 width = 1;
23240 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23241 // valid because multibyte
23242 } else {
23243 // invalid because no match
23244 return PM_SLICE_TYPE_NONE;
23245 }
23246
23247 // determine the type of the slice based on the first character
23248 const uint8_t *end = source + length;
23249 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23250
23251 // next, iterate through all of the bytes of the string to ensure that they
23252 // are all valid identifier characters
23253 source += width;
23254
23255 while (source < end) {
23256 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23257 // valid because alphanumeric
23258 source += width;
23259 } else if (*source == '_') {
23260 // valid because underscore
23261 source++;
23262 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23263 // valid because multibyte
23264 source += width;
23265 } else {
23266 // invalid because no match
23267 break;
23268 }
23269 }
23270
23271 // accept a ! or ? at the end of the slice as a method name
23272 if (*source == '!' || *source == '?' || *source == '=') {
23273 source++;
23274 result = PM_SLICE_TYPE_METHOD_NAME;
23275 }
23276
23277 // valid if we are at the end of the slice
23278 return source == end ? result : PM_SLICE_TYPE_NONE;
23279}
23280
23285pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23286 switch (pm_slice_type(source, length, encoding_name)) {
23287 case PM_SLICE_TYPE_ERROR:
23288 return PM_STRING_QUERY_ERROR;
23289 case PM_SLICE_TYPE_NONE:
23290 case PM_SLICE_TYPE_CONSTANT:
23291 case PM_SLICE_TYPE_METHOD_NAME:
23292 return PM_STRING_QUERY_FALSE;
23293 case PM_SLICE_TYPE_LOCAL:
23294 return PM_STRING_QUERY_TRUE;
23295 }
23296
23297 assert(false && "unreachable");
23298 return PM_STRING_QUERY_FALSE;
23299}
23300
23305pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23306 switch (pm_slice_type(source, length, encoding_name)) {
23307 case PM_SLICE_TYPE_ERROR:
23308 return PM_STRING_QUERY_ERROR;
23309 case PM_SLICE_TYPE_NONE:
23310 case PM_SLICE_TYPE_LOCAL:
23311 case PM_SLICE_TYPE_METHOD_NAME:
23312 return PM_STRING_QUERY_FALSE;
23313 case PM_SLICE_TYPE_CONSTANT:
23314 return PM_STRING_QUERY_TRUE;
23315 }
23316
23317 assert(false && "unreachable");
23318 return PM_STRING_QUERY_FALSE;
23319}
23320
23325pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23326#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23327#define C1(c) (*source == c)
23328#define C2(s) (memcmp(source, s, 2) == 0)
23329#define C3(s) (memcmp(source, s, 3) == 0)
23330
23331 switch (pm_slice_type(source, length, encoding_name)) {
23332 case PM_SLICE_TYPE_ERROR:
23333 return PM_STRING_QUERY_ERROR;
23334 case PM_SLICE_TYPE_NONE:
23335 break;
23336 case PM_SLICE_TYPE_LOCAL:
23337 // numbered parameters are not valid method names
23338 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23339 case PM_SLICE_TYPE_CONSTANT:
23340 // all constants are valid method names
23341 case PM_SLICE_TYPE_METHOD_NAME:
23342 // all method names are valid method names
23343 return PM_STRING_QUERY_TRUE;
23344 }
23345
23346 switch (length) {
23347 case 1:
23348 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23349 case 2:
23350 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23351 case 3:
23352 return B(C3("===") || C3("<=>") || C3("[]="));
23353 default:
23354 return PM_STRING_QUERY_FALSE;
23355 }
23356
23357#undef B
23358#undef C1
23359#undef C2
23360#undef C3
23361}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:208
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:192
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:172
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:219
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:225
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:98
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_3_5
The vendored version of prism in CRuby 3.5.x.
Definition options.h:95
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:351
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:359
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:367
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22970
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22906
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22944
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:23057
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:22645
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2141
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2118
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2048
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:18120
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:18122
const uint8_t * start
The start of the regular expression.
Definition prism.c:18125
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18136
const uint8_t * end
The end of the regular expression.
Definition prism.c:18128
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:21041
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:21052
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:21043
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:21049
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:21046
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:21059
AndNode.
Definition ast.h:1262
struct pm_node * left
AndNode::left.
Definition ast.h:1278
struct pm_node * right
AndNode::right.
Definition ast.h:1291
ArgumentsNode.
Definition ast.h:1323
pm_node_t base
The embedded base node.
Definition ast.h:1325
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1336
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1354
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1364
ArrayPatternNode.
Definition ast.h:1415
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1434
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1474
pm_node_t base
The embedded base node.
Definition ast.h:1417
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1484
AssocNode.
Definition ast.h:1499
struct pm_node * value
AssocNode::value.
Definition ast.h:1531
struct pm_node * key
AssocNode::key.
Definition ast.h:1518
BeginNode.
Definition ast.h:1625
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1678
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1658
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1648
pm_node_t base
The embedded base node.
Definition ast.h:1627
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1668
This struct represents a set of binding powers used for a given token.
Definition prism.c:13031
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:13039
pm_binding_power_t left
The left binding power.
Definition prism.c:13033
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:13045
pm_binding_power_t right
The right binding power.
Definition prism.c:13036
BlockLocalVariableNode.
Definition ast.h:1744
BlockNode.
Definition ast.h:1772
BlockParameterNode.
Definition ast.h:1848
BlockParametersNode.
Definition ast.h:1902
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2129
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2190
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2210
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2148
pm_constant_id_t name
CallNode::name.
Definition ast.h:2171
pm_node_t base
The embedded base node.
Definition ast.h:2131
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2161
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2181
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2200
struct pm_node * block
CallNode::block.
Definition ast.h:2220
CaseMatchNode.
Definition ast.h:2555
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2578
CaseNode.
Definition ast.h:2625
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2648
ClassVariableReadNode.
Definition ast.h:2920
ClassVariableTargetNode.
Definition ast.h:2949
ClassVariableWriteNode.
Definition ast.h:2972
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3186
ConstantPathTargetNode.
Definition ast.h:3324
ConstantReadNode.
Definition ast.h:3419
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3448
ConstantWriteNode.
Definition ast.h:3471
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:364
ElseNode.
Definition ast.h:3650
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3663
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3748
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3761
FindPatternNode.
Definition ast.h:3808
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3821
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3873
pm_node_t base
The embedded base node.
Definition ast.h:3810
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3886
FlipFlopNode.
Definition ast.h:3904
FloatNode.
Definition ast.h:3937
double value
FloatNode::value.
Definition ast.h:3947
pm_node_t base
The embedded base node.
Definition ast.h:3939
ForwardingParameterNode.
Definition ast.h:4073
GlobalVariableReadNode.
Definition ast.h:4233
GlobalVariableTargetNode.
Definition ast.h:4262
GlobalVariableWriteNode.
Definition ast.h:4285
HashNode.
Definition ast.h:4347
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4373
HashPatternNode.
Definition ast.h:4407
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4462
pm_node_t base
The embedded base node.
Definition ast.h:4409
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4475
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4423
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4496
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4556
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4575
ImaginaryNode.
Definition ast.h:4602
InstanceVariableReadNode.
Definition ast.h:5092
InstanceVariableTargetNode.
Definition ast.h:5121
InstanceVariableWriteNode.
Definition ast.h:5144
IntegerNode.
Definition ast.h:5212
pm_integer_t value
IntegerNode::value.
Definition ast.h:5222
pm_node_t base
The embedded base node.
Definition ast.h:5214
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5250
InterpolatedRegularExpressionNode.
Definition ast.h:5296
InterpolatedStringNode.
Definition ast.h:5333
pm_node_t base
The embedded base node.
Definition ast.h:5335
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5341
InterpolatedSymbolNode.
Definition ast.h:5366
pm_node_t base
The embedded base node.
Definition ast.h:5368
InterpolatedXStringNode.
Definition ast.h:5399
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5407
pm_node_t base
The embedded base node.
Definition ast.h:5401
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5412
KeywordHashNode.
Definition ast.h:5471
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@95 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
union pm_lex_mode::@96 as
The data associated with this type of lex mode.
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5713
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5744
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5731
LocalVariableTargetNode.
Definition ast.h:5762
LocalVariableWriteNode.
Definition ast.h:5790
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5817
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5804
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5882
MatchWriteNode.
Definition ast.h:6040
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6053
MissingNode.
Definition ast.h:6065
MultiTargetNode.
Definition ast.h:6136
pm_node_t base
The embedded base node.
Definition ast.h:6138
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6194
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6154
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6204
MultiWriteNode.
Definition ast.h:6219
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1068
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1073
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1079
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
OptionalParameterNode.
Definition ast.h:6492
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:104
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:153
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:115
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:169
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:176
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:130
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:109
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:162
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:186
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:135
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:118
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:150
OrNode.
Definition ast.h:6530
struct pm_node * left
OrNode::left.
Definition ast.h:6546
struct pm_node * right
OrNode::right.
Definition ast.h:6559
ParametersNode.
Definition ast.h:6585
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6603
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6623
pm_node_t base
The embedded base node.
Definition ast.h:6587
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6618
ParenthesesNode.
Definition ast.h:6641
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6649
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
struct pm_parser::@101 lex_modes
A stack of lex modes.
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6877
struct pm_node * right
RangeNode::right.
Definition ast.h:6907
struct pm_node * left
RangeNode::left.
Definition ast.h:6893
RationalNode.
Definition ast.h:6935
pm_node_t base
The embedded base node.
Definition ast.h:6937
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6947
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10377
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10382
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10379
RegularExpressionNode.
Definition ast.h:7002
pm_node_t base
The embedded base node.
Definition ast.h:7004
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7025
RequiredParameterNode.
Definition ast.h:7076
RescueModifierNode.
Definition ast.h:7099
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7117
RescueNode.
Definition ast.h:7137
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7175
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7165
pm_node_t base
The embedded base node.
Definition ast.h:7139
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7437
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7450
StatementsNode.
Definition ast.h:7465
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7473
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7500
pm_node_t base
The embedded base node.
Definition ast.h:7502
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7523
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7518
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7508
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@102 type
The type of the string.
SymbolNode.
Definition ast.h:7592
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7605
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7615
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10351
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10356
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10362
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7648
UnlessNode.
Definition ast.h:7679
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7729
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7739
WhenNode.
Definition ast.h:7815
XStringNode.
Definition ast.h:7906