Ruby 3.5.0dev (2025-04-04 revision 6b5e187d0eb07994fee7b5f0336da388a793dcbb)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1044 return void_node != NULL ? void_node : node;
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1250 type = "::";
1251 length = 2;
1252 break;
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1270 case PM_RATIONAL_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2629
2635static pm_call_node_t *
2636pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2637 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2638
2639 *node = (pm_call_node_t) {
2640 {
2641 .type = PM_CALL_NODE,
2642 .flags = flags,
2643 .node_id = PM_NODE_IDENTIFY(parser),
2644 .location = PM_LOCATION_NULL_VALUE(parser),
2645 },
2646 .receiver = NULL,
2647 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2648 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .arguments = NULL,
2651 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2652 .block = NULL,
2653 .name = 0
2654 };
2655
2656 return node;
2657}
2658
2663static inline pm_node_flags_t
2664pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2665 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2666}
2667
2672static pm_call_node_t *
2673pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2674 pm_assert_value_expression(parser, receiver);
2675
2676 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2677 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2678 flags |= PM_CALL_NODE_FLAGS_INDEX;
2679 }
2680
2681 pm_call_node_t *node = pm_call_node_create(parser, flags);
2682
2683 node->base.location.start = receiver->location.start;
2684 node->base.location.end = pm_arguments_end(arguments);
2685
2686 node->receiver = receiver;
2687 node->message_loc.start = arguments->opening_loc.start;
2688 node->message_loc.end = arguments->closing_loc.end;
2689
2690 node->opening_loc = arguments->opening_loc;
2691 node->arguments = arguments->arguments;
2692 node->closing_loc = arguments->closing_loc;
2693 node->block = arguments->block;
2694
2695 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2696 return node;
2697}
2698
2702static pm_call_node_t *
2703pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2704 pm_assert_value_expression(parser, receiver);
2705 pm_assert_value_expression(parser, argument);
2706
2707 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2708
2709 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2710 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2711
2712 node->receiver = receiver;
2713 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2714
2715 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2716 pm_arguments_node_arguments_append(arguments, argument);
2717 node->arguments = arguments;
2718
2719 node->name = pm_parser_constant_id_token(parser, operator);
2720 return node;
2721}
2722
2726static pm_call_node_t *
2727pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2728 pm_assert_value_expression(parser, receiver);
2729
2730 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2731
2732 node->base.location.start = receiver->location.start;
2733 const uint8_t *end = pm_arguments_end(arguments);
2734 if (end == NULL) {
2735 end = message->end;
2736 }
2737 node->base.location.end = end;
2738
2739 node->receiver = receiver;
2740 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2741 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2742 node->opening_loc = arguments->opening_loc;
2743 node->arguments = arguments->arguments;
2744 node->closing_loc = arguments->closing_loc;
2745 node->block = arguments->block;
2746
2747 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2748 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2749 }
2750
2751 node->name = pm_parser_constant_id_token(parser, message);
2752 return node;
2753}
2754
2758static pm_call_node_t *
2759pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2760 pm_call_node_t *node = pm_call_node_create(parser, 0);
2761 node->base.location.start = parser->start;
2762 node->base.location.end = parser->end;
2763
2764 node->receiver = receiver;
2765 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2766 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->arguments = arguments;
2768
2769 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2770 return node;
2771}
2772
2777static pm_call_node_t *
2778pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2779 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2780
2781 node->base.location.start = message->start;
2782 node->base.location.end = pm_arguments_end(arguments);
2783
2784 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2785 node->opening_loc = arguments->opening_loc;
2786 node->arguments = arguments->arguments;
2787 node->closing_loc = arguments->closing_loc;
2788 node->block = arguments->block;
2789
2790 node->name = pm_parser_constant_id_token(parser, message);
2791 return node;
2792}
2793
2798static pm_call_node_t *
2799pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2800 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2801
2802 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2803 node->arguments = arguments;
2804
2805 node->name = name;
2806 return node;
2807}
2808
2812static pm_call_node_t *
2813pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2814 pm_assert_value_expression(parser, receiver);
2815 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2816
2817 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2818
2819 node->base.location.start = message->start;
2820 if (arguments->closing_loc.start != NULL) {
2821 node->base.location.end = arguments->closing_loc.end;
2822 } else {
2823 assert(receiver != NULL);
2824 node->base.location.end = receiver->location.end;
2825 }
2826
2827 node->receiver = receiver;
2828 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2829 node->opening_loc = arguments->opening_loc;
2830 node->arguments = arguments->arguments;
2831 node->closing_loc = arguments->closing_loc;
2832
2833 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2834 return node;
2835}
2836
2840static pm_call_node_t *
2841pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2842 pm_assert_value_expression(parser, receiver);
2843
2844 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2845
2846 node->base.location.start = receiver->location.start;
2847 node->base.location.end = pm_arguments_end(arguments);
2848
2849 node->receiver = receiver;
2850 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2851 node->opening_loc = arguments->opening_loc;
2852 node->arguments = arguments->arguments;
2853 node->closing_loc = arguments->closing_loc;
2854 node->block = arguments->block;
2855
2856 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2857 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2858 }
2859
2860 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2861 return node;
2862}
2863
2867static pm_call_node_t *
2868pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2869 pm_assert_value_expression(parser, receiver);
2870
2871 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2872
2873 node->base.location.start = operator->start;
2874 node->base.location.end = receiver->location.end;
2875
2876 node->receiver = receiver;
2877 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2878
2879 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2880 return node;
2881}
2882
2887static pm_call_node_t *
2888pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2889 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2890
2891 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2892 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2893
2894 node->name = pm_parser_constant_id_token(parser, message);
2895 return node;
2896}
2897
2902static inline bool
2903pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2904 return (
2905 (node->message_loc.start != NULL) &&
2906 (node->message_loc.end[-1] != '!') &&
2907 (node->message_loc.end[-1] != '?') &&
2908 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2909 (node->opening_loc.start == NULL) &&
2910 (node->arguments == NULL) &&
2911 (node->block == NULL)
2912 );
2913}
2914
2918static void
2919pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2920 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2921
2922 if (write_constant->length > 0) {
2923 size_t length = write_constant->length - 1;
2924
2925 void *memory = xmalloc(length);
2926 memcpy(memory, write_constant->start, length);
2927
2928 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2929 } else {
2930 // We can get here if the message was missing because of a syntax error.
2931 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2932 }
2933}
2934
2938static pm_call_and_write_node_t *
2939pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2940 assert(target->block == NULL);
2941 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2942 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2943
2944 *node = (pm_call_and_write_node_t) {
2945 {
2946 .type = PM_CALL_AND_WRITE_NODE,
2947 .flags = target->base.flags,
2948 .node_id = PM_NODE_IDENTIFY(parser),
2949 .location = {
2950 .start = target->base.location.start,
2951 .end = value->location.end
2952 }
2953 },
2954 .receiver = target->receiver,
2955 .call_operator_loc = target->call_operator_loc,
2956 .message_loc = target->message_loc,
2957 .read_name = 0,
2958 .write_name = target->name,
2959 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2960 .value = value
2961 };
2962
2963 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2964
2965 // Here we're going to free the target, since it is no longer necessary.
2966 // However, we don't want to call `pm_node_destroy` because we want to keep
2967 // around all of its children since we just reused them.
2968 xfree(target);
2969
2970 return node;
2971}
2972
2977static void
2978pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979 if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2980 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981 pm_node_t *node;
2982 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2983 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2984 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2985 break;
2986 }
2987 }
2988 }
2989
2990 if (block != NULL) {
2991 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2992 }
2993 }
2994}
2995
2999static pm_index_and_write_node_t *
3000pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3001 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3002 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3003
3004 pm_index_arguments_check(parser, target->arguments, target->block);
3005
3006 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3007 *node = (pm_index_and_write_node_t) {
3008 {
3009 .type = PM_INDEX_AND_WRITE_NODE,
3010 .flags = target->base.flags,
3011 .node_id = PM_NODE_IDENTIFY(parser),
3012 .location = {
3013 .start = target->base.location.start,
3014 .end = value->location.end
3015 }
3016 },
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .opening_loc = target->opening_loc,
3020 .arguments = target->arguments,
3021 .closing_loc = target->closing_loc,
3022 .block = (pm_block_argument_node_t *) target->block,
3023 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3024 .value = value
3025 };
3026
3027 // Here we're going to free the target, since it is no longer necessary.
3028 // However, we don't want to call `pm_node_destroy` because we want to keep
3029 // around all of its children since we just reused them.
3030 xfree(target);
3031
3032 return node;
3033}
3034
3038static pm_call_operator_write_node_t *
3039pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3040 assert(target->block == NULL);
3041 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3042
3043 *node = (pm_call_operator_write_node_t) {
3044 {
3045 .type = PM_CALL_OPERATOR_WRITE_NODE,
3046 .flags = target->base.flags,
3047 .node_id = PM_NODE_IDENTIFY(parser),
3048 .location = {
3049 .start = target->base.location.start,
3050 .end = value->location.end
3051 }
3052 },
3053 .receiver = target->receiver,
3054 .call_operator_loc = target->call_operator_loc,
3055 .message_loc = target->message_loc,
3056 .read_name = 0,
3057 .write_name = target->name,
3058 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3059 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3060 .value = value
3061 };
3062
3063 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3064
3065 // Here we're going to free the target, since it is no longer necessary.
3066 // However, we don't want to call `pm_node_destroy` because we want to keep
3067 // around all of its children since we just reused them.
3068 xfree(target);
3069
3070 return node;
3071}
3072
3076static pm_index_operator_write_node_t *
3077pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3078 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3079
3080 pm_index_arguments_check(parser, target->arguments, target->block);
3081
3082 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3083 *node = (pm_index_operator_write_node_t) {
3084 {
3085 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3086 .flags = target->base.flags,
3087 .node_id = PM_NODE_IDENTIFY(parser),
3088 .location = {
3089 .start = target->base.location.start,
3090 .end = value->location.end
3091 }
3092 },
3093 .receiver = target->receiver,
3094 .call_operator_loc = target->call_operator_loc,
3095 .opening_loc = target->opening_loc,
3096 .arguments = target->arguments,
3097 .closing_loc = target->closing_loc,
3098 .block = (pm_block_argument_node_t *) target->block,
3099 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3100 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3101 .value = value
3102 };
3103
3104 // Here we're going to free the target, since it is no longer necessary.
3105 // However, we don't want to call `pm_node_destroy` because we want to keep
3106 // around all of its children since we just reused them.
3107 xfree(target);
3108
3109 return node;
3110}
3111
3115static pm_call_or_write_node_t *
3116pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3117 assert(target->block == NULL);
3118 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3119 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3120
3121 *node = (pm_call_or_write_node_t) {
3122 {
3123 .type = PM_CALL_OR_WRITE_NODE,
3124 .flags = target->base.flags,
3125 .node_id = PM_NODE_IDENTIFY(parser),
3126 .location = {
3127 .start = target->base.location.start,
3128 .end = value->location.end
3129 }
3130 },
3131 .receiver = target->receiver,
3132 .call_operator_loc = target->call_operator_loc,
3133 .message_loc = target->message_loc,
3134 .read_name = 0,
3135 .write_name = target->name,
3136 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3137 .value = value
3138 };
3139
3140 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3141
3142 // Here we're going to free the target, since it is no longer necessary.
3143 // However, we don't want to call `pm_node_destroy` because we want to keep
3144 // around all of its children since we just reused them.
3145 xfree(target);
3146
3147 return node;
3148}
3149
3153static pm_index_or_write_node_t *
3154pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3155 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3156 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3157
3158 pm_index_arguments_check(parser, target->arguments, target->block);
3159
3160 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3161 *node = (pm_index_or_write_node_t) {
3162 {
3163 .type = PM_INDEX_OR_WRITE_NODE,
3164 .flags = target->base.flags,
3165 .node_id = PM_NODE_IDENTIFY(parser),
3166 .location = {
3167 .start = target->base.location.start,
3168 .end = value->location.end
3169 }
3170 },
3171 .receiver = target->receiver,
3172 .call_operator_loc = target->call_operator_loc,
3173 .opening_loc = target->opening_loc,
3174 .arguments = target->arguments,
3175 .closing_loc = target->closing_loc,
3176 .block = (pm_block_argument_node_t *) target->block,
3177 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3178 .value = value
3179 };
3180
3181 // Here we're going to free the target, since it is no longer necessary.
3182 // However, we don't want to call `pm_node_destroy` because we want to keep
3183 // around all of its children since we just reused them.
3184 xfree(target);
3185
3186 return node;
3187}
3188
3193static pm_call_target_node_t *
3194pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3195 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3196
3197 *node = (pm_call_target_node_t) {
3198 {
3199 .type = PM_CALL_TARGET_NODE,
3200 .flags = target->base.flags,
3201 .node_id = PM_NODE_IDENTIFY(parser),
3202 .location = target->base.location
3203 },
3204 .receiver = target->receiver,
3205 .call_operator_loc = target->call_operator_loc,
3206 .name = target->name,
3207 .message_loc = target->message_loc
3208 };
3209
3210 // Here we're going to free the target, since it is no longer necessary.
3211 // However, we don't want to call `pm_node_destroy` because we want to keep
3212 // around all of its children since we just reused them.
3213 xfree(target);
3214
3215 return node;
3216}
3217
3222static pm_index_target_node_t *
3223pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3224 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3225 pm_node_flags_t flags = target->base.flags;
3226
3227 pm_index_arguments_check(parser, target->arguments, target->block);
3228
3229 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3230 *node = (pm_index_target_node_t) {
3231 {
3232 .type = PM_INDEX_TARGET_NODE,
3233 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3234 .node_id = PM_NODE_IDENTIFY(parser),
3235 .location = target->base.location
3236 },
3237 .receiver = target->receiver,
3238 .opening_loc = target->opening_loc,
3239 .arguments = target->arguments,
3240 .closing_loc = target->closing_loc,
3241 .block = (pm_block_argument_node_t *) target->block,
3242 };
3243
3244 // Here we're going to free the target, since it is no longer necessary.
3245 // However, we don't want to call `pm_node_destroy` because we want to keep
3246 // around all of its children since we just reused them.
3247 xfree(target);
3248
3249 return node;
3250}
3251
3255static pm_capture_pattern_node_t *
3256pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3257 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3258
3259 *node = (pm_capture_pattern_node_t) {
3260 {
3261 .type = PM_CAPTURE_PATTERN_NODE,
3262 .node_id = PM_NODE_IDENTIFY(parser),
3263 .location = {
3264 .start = value->location.start,
3265 .end = target->base.location.end
3266 },
3267 },
3268 .value = value,
3269 .target = target,
3270 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3271 };
3272
3273 return node;
3274}
3275
3279static pm_case_node_t *
3280pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3281 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3282
3283 *node = (pm_case_node_t) {
3284 {
3285 .type = PM_CASE_NODE,
3286 .node_id = PM_NODE_IDENTIFY(parser),
3287 .location = {
3288 .start = case_keyword->start,
3289 .end = end_keyword->end
3290 },
3291 },
3292 .predicate = predicate,
3293 .else_clause = NULL,
3294 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3295 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3296 .conditions = { 0 }
3297 };
3298
3299 return node;
3300}
3301
3305static void
3306pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3307 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3308
3309 pm_node_list_append(&node->conditions, condition);
3310 node->base.location.end = condition->location.end;
3311}
3312
3316static void
3317pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3318 node->else_clause = else_clause;
3319 node->base.location.end = else_clause->base.location.end;
3320}
3321
3325static void
3326pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3327 node->base.location.end = end_keyword->end;
3328 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3329}
3330
3334static pm_case_match_node_t *
3335pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3336 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3337
3338 *node = (pm_case_match_node_t) {
3339 {
3340 .type = PM_CASE_MATCH_NODE,
3341 .node_id = PM_NODE_IDENTIFY(parser),
3342 .location = {
3343 .start = case_keyword->start,
3344 .end = end_keyword->end
3345 },
3346 },
3347 .predicate = predicate,
3348 .else_clause = NULL,
3349 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3350 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3351 .conditions = { 0 }
3352 };
3353
3354 return node;
3355}
3356
3360static void
3361pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3362 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3363
3364 pm_node_list_append(&node->conditions, condition);
3365 node->base.location.end = condition->location.end;
3366}
3367
3371static void
3372pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3373 node->else_clause = else_clause;
3374 node->base.location.end = else_clause->base.location.end;
3375}
3376
3380static void
3381pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3382 node->base.location.end = end_keyword->end;
3383 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3384}
3385
3389static pm_class_node_t *
3390pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3391 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3392
3393 *node = (pm_class_node_t) {
3394 {
3395 .type = PM_CLASS_NODE,
3396 .node_id = PM_NODE_IDENTIFY(parser),
3397 .location = { .start = class_keyword->start, .end = end_keyword->end },
3398 },
3399 .locals = *locals,
3400 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3401 .constant_path = constant_path,
3402 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3403 .superclass = superclass,
3404 .body = body,
3405 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3406 .name = pm_parser_constant_id_token(parser, name)
3407 };
3408
3409 return node;
3410}
3411
3415static pm_class_variable_and_write_node_t *
3416pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3417 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3418 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3419
3420 *node = (pm_class_variable_and_write_node_t) {
3421 {
3422 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3423 .node_id = PM_NODE_IDENTIFY(parser),
3424 .location = {
3425 .start = target->base.location.start,
3426 .end = value->location.end
3427 }
3428 },
3429 .name = target->name,
3430 .name_loc = target->base.location,
3431 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3432 .value = value
3433 };
3434
3435 return node;
3436}
3437
3441static pm_class_variable_operator_write_node_t *
3442pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3443 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3444
3445 *node = (pm_class_variable_operator_write_node_t) {
3446 {
3447 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3448 .node_id = PM_NODE_IDENTIFY(parser),
3449 .location = {
3450 .start = target->base.location.start,
3451 .end = value->location.end
3452 }
3453 },
3454 .name = target->name,
3455 .name_loc = target->base.location,
3456 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3457 .value = value,
3458 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3459 };
3460
3461 return node;
3462}
3463
3467static pm_class_variable_or_write_node_t *
3468pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3469 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3470 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3471
3472 *node = (pm_class_variable_or_write_node_t) {
3473 {
3474 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3475 .node_id = PM_NODE_IDENTIFY(parser),
3476 .location = {
3477 .start = target->base.location.start,
3478 .end = value->location.end
3479 }
3480 },
3481 .name = target->name,
3482 .name_loc = target->base.location,
3483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3484 .value = value
3485 };
3486
3487 return node;
3488}
3489
3493static pm_class_variable_read_node_t *
3494pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3495 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3496 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3497
3498 *node = (pm_class_variable_read_node_t) {
3499 {
3500 .type = PM_CLASS_VARIABLE_READ_NODE,
3501 .node_id = PM_NODE_IDENTIFY(parser),
3502 .location = PM_LOCATION_TOKEN_VALUE(token)
3503 },
3504 .name = pm_parser_constant_id_token(parser, token)
3505 };
3506
3507 return node;
3508}
3509
3516static inline pm_node_flags_t
3517pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3518 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3519 return flags;
3520 }
3521 return 0;
3522}
3523
3527static pm_class_variable_write_node_t *
3528pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3529 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3530
3531 *node = (pm_class_variable_write_node_t) {
3532 {
3533 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3534 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3535 .node_id = PM_NODE_IDENTIFY(parser),
3536 .location = {
3537 .start = read_node->base.location.start,
3538 .end = value->location.end
3539 },
3540 },
3541 .name = read_node->name,
3542 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3543 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3544 .value = value
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_path_and_write_node_t *
3554pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3556 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3557
3558 *node = (pm_constant_path_and_write_node_t) {
3559 {
3560 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3561 .node_id = PM_NODE_IDENTIFY(parser),
3562 .location = {
3563 .start = target->base.location.start,
3564 .end = value->location.end
3565 }
3566 },
3567 .target = target,
3568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3569 .value = value
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_path_operator_write_node_t *
3579pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3581
3582 *node = (pm_constant_path_operator_write_node_t) {
3583 {
3584 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3585 .node_id = PM_NODE_IDENTIFY(parser),
3586 .location = {
3587 .start = target->base.location.start,
3588 .end = value->location.end
3589 }
3590 },
3591 .target = target,
3592 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3593 .value = value,
3594 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3595 };
3596
3597 return node;
3598}
3599
3603static pm_constant_path_or_write_node_t *
3604pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3605 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3606 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3607
3608 *node = (pm_constant_path_or_write_node_t) {
3609 {
3610 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3611 .node_id = PM_NODE_IDENTIFY(parser),
3612 .location = {
3613 .start = target->base.location.start,
3614 .end = value->location.end
3615 }
3616 },
3617 .target = target,
3618 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3619 .value = value
3620 };
3621
3622 return node;
3623}
3624
3628static pm_constant_path_node_t *
3629pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3630 pm_assert_value_expression(parser, parent);
3631 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3632
3633 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3634 if (name_token->type == PM_TOKEN_CONSTANT) {
3635 name = pm_parser_constant_id_token(parser, name_token);
3636 }
3637
3638 *node = (pm_constant_path_node_t) {
3639 {
3640 .type = PM_CONSTANT_PATH_NODE,
3641 .node_id = PM_NODE_IDENTIFY(parser),
3642 .location = {
3643 .start = parent == NULL ? delimiter->start : parent->location.start,
3644 .end = name_token->end
3645 },
3646 },
3647 .parent = parent,
3648 .name = name,
3649 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3650 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3651 };
3652
3653 return node;
3654}
3655
3659static pm_constant_path_write_node_t *
3660pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3661 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3662
3663 *node = (pm_constant_path_write_node_t) {
3664 {
3665 .type = PM_CONSTANT_PATH_WRITE_NODE,
3666 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3667 .node_id = PM_NODE_IDENTIFY(parser),
3668 .location = {
3669 .start = target->base.location.start,
3670 .end = value->location.end
3671 },
3672 },
3673 .target = target,
3674 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3675 .value = value
3676 };
3677
3678 return node;
3679}
3680
3684static pm_constant_and_write_node_t *
3685pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3687 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3688
3689 *node = (pm_constant_and_write_node_t) {
3690 {
3691 .type = PM_CONSTANT_AND_WRITE_NODE,
3692 .node_id = PM_NODE_IDENTIFY(parser),
3693 .location = {
3694 .start = target->base.location.start,
3695 .end = value->location.end
3696 }
3697 },
3698 .name = target->name,
3699 .name_loc = target->base.location,
3700 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3701 .value = value
3702 };
3703
3704 return node;
3705}
3706
3710static pm_constant_operator_write_node_t *
3711pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3712 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3713
3714 *node = (pm_constant_operator_write_node_t) {
3715 {
3716 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3717 .node_id = PM_NODE_IDENTIFY(parser),
3718 .location = {
3719 .start = target->base.location.start,
3720 .end = value->location.end
3721 }
3722 },
3723 .name = target->name,
3724 .name_loc = target->base.location,
3725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3726 .value = value,
3727 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3728 };
3729
3730 return node;
3731}
3732
3736static pm_constant_or_write_node_t *
3737pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3738 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3739 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3740
3741 *node = (pm_constant_or_write_node_t) {
3742 {
3743 .type = PM_CONSTANT_OR_WRITE_NODE,
3744 .node_id = PM_NODE_IDENTIFY(parser),
3745 .location = {
3746 .start = target->base.location.start,
3747 .end = value->location.end
3748 }
3749 },
3750 .name = target->name,
3751 .name_loc = target->base.location,
3752 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3753 .value = value
3754 };
3755
3756 return node;
3757}
3758
3762static pm_constant_read_node_t *
3763pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3764 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3765 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3766
3767 *node = (pm_constant_read_node_t) {
3768 {
3769 .type = PM_CONSTANT_READ_NODE,
3770 .node_id = PM_NODE_IDENTIFY(parser),
3771 .location = PM_LOCATION_TOKEN_VALUE(name)
3772 },
3773 .name = pm_parser_constant_id_token(parser, name)
3774 };
3775
3776 return node;
3777}
3778
3782static pm_constant_write_node_t *
3783pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3784 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3785
3786 *node = (pm_constant_write_node_t) {
3787 {
3788 .type = PM_CONSTANT_WRITE_NODE,
3789 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3790 .node_id = PM_NODE_IDENTIFY(parser),
3791 .location = {
3792 .start = target->base.location.start,
3793 .end = value->location.end
3794 }
3795 },
3796 .name = target->name,
3797 .name_loc = target->base.location,
3798 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3799 .value = value
3800 };
3801
3802 return node;
3803}
3804
3808static void
3809pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3810 switch (PM_NODE_TYPE(node)) {
3811 case PM_BEGIN_NODE: {
3812 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3813 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3814 break;
3815 }
3816 case PM_PARENTHESES_NODE: {
3817 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3818 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3819 break;
3820 }
3821 case PM_STATEMENTS_NODE: {
3822 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3823 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3824 break;
3825 }
3826 case PM_ARRAY_NODE:
3827 case PM_FLOAT_NODE:
3828 case PM_IMAGINARY_NODE:
3829 case PM_INTEGER_NODE:
3830 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3831 case PM_INTERPOLATED_STRING_NODE:
3832 case PM_INTERPOLATED_SYMBOL_NODE:
3833 case PM_INTERPOLATED_X_STRING_NODE:
3834 case PM_RATIONAL_NODE:
3835 case PM_REGULAR_EXPRESSION_NODE:
3836 case PM_SOURCE_ENCODING_NODE:
3837 case PM_SOURCE_FILE_NODE:
3838 case PM_SOURCE_LINE_NODE:
3839 case PM_STRING_NODE:
3840 case PM_SYMBOL_NODE:
3841 case PM_X_STRING_NODE:
3842 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3843 break;
3844 default:
3845 break;
3846 }
3847}
3848
3852static pm_def_node_t *
3853pm_def_node_create(
3854 pm_parser_t *parser,
3855 pm_constant_id_t name,
3856 const pm_token_t *name_loc,
3857 pm_node_t *receiver,
3858 pm_parameters_node_t *parameters,
3859 pm_node_t *body,
3860 pm_constant_id_list_t *locals,
3861 const pm_token_t *def_keyword,
3862 const pm_token_t *operator,
3863 const pm_token_t *lparen,
3864 const pm_token_t *rparen,
3865 const pm_token_t *equal,
3866 const pm_token_t *end_keyword
3867) {
3868 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3869 const uint8_t *end;
3870
3871 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3872 end = body->location.end;
3873 } else {
3874 end = end_keyword->end;
3875 }
3876
3877 if (receiver != NULL) {
3878 pm_def_node_receiver_check(parser, receiver);
3879 }
3880
3881 *node = (pm_def_node_t) {
3882 {
3883 .type = PM_DEF_NODE,
3884 .node_id = PM_NODE_IDENTIFY(parser),
3885 .location = { .start = def_keyword->start, .end = end },
3886 },
3887 .name = name,
3888 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3889 .receiver = receiver,
3890 .parameters = parameters,
3891 .body = body,
3892 .locals = *locals,
3893 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3894 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3895 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3896 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3897 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3898 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3899 };
3900
3901 return node;
3902}
3903
3907static pm_defined_node_t *
3908pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3909 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3910
3911 *node = (pm_defined_node_t) {
3912 {
3913 .type = PM_DEFINED_NODE,
3914 .node_id = PM_NODE_IDENTIFY(parser),
3915 .location = {
3916 .start = keyword_loc->start,
3917 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3918 },
3919 },
3920 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3921 .value = value,
3922 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3923 .keyword_loc = *keyword_loc
3924 };
3925
3926 return node;
3927}
3928
3932static pm_else_node_t *
3933pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3934 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3935 const uint8_t *end = NULL;
3936 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3937 end = statements->base.location.end;
3938 } else {
3939 end = end_keyword->end;
3940 }
3941
3942 *node = (pm_else_node_t) {
3943 {
3944 .type = PM_ELSE_NODE,
3945 .node_id = PM_NODE_IDENTIFY(parser),
3946 .location = {
3947 .start = else_keyword->start,
3948 .end = end,
3949 },
3950 },
3951 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3952 .statements = statements,
3953 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3954 };
3955
3956 return node;
3957}
3958
3962static pm_embedded_statements_node_t *
3963pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3964 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3965
3966 *node = (pm_embedded_statements_node_t) {
3967 {
3968 .type = PM_EMBEDDED_STATEMENTS_NODE,
3969 .node_id = PM_NODE_IDENTIFY(parser),
3970 .location = {
3971 .start = opening->start,
3972 .end = closing->end
3973 }
3974 },
3975 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3976 .statements = statements,
3977 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3978 };
3979
3980 return node;
3981}
3982
3986static pm_embedded_variable_node_t *
3987pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3988 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3989
3990 *node = (pm_embedded_variable_node_t) {
3991 {
3992 .type = PM_EMBEDDED_VARIABLE_NODE,
3993 .node_id = PM_NODE_IDENTIFY(parser),
3994 .location = {
3995 .start = operator->start,
3996 .end = variable->location.end
3997 }
3998 },
3999 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4000 .variable = variable
4001 };
4002
4003 return node;
4004}
4005
4009static pm_ensure_node_t *
4010pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4011 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4012
4013 *node = (pm_ensure_node_t) {
4014 {
4015 .type = PM_ENSURE_NODE,
4016 .node_id = PM_NODE_IDENTIFY(parser),
4017 .location = {
4018 .start = ensure_keyword->start,
4019 .end = end_keyword->end
4020 },
4021 },
4022 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4023 .statements = statements,
4024 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4025 };
4026
4027 return node;
4028}
4029
4033static pm_false_node_t *
4034pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4035 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4036 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4037
4038 *node = (pm_false_node_t) {{
4039 .type = PM_FALSE_NODE,
4040 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4041 .node_id = PM_NODE_IDENTIFY(parser),
4042 .location = PM_LOCATION_TOKEN_VALUE(token)
4043 }};
4044
4045 return node;
4046}
4047
4052static pm_find_pattern_node_t *
4053pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4054 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4055
4056 pm_node_t *left = nodes->nodes[0];
4057 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4058 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4059
4060 pm_node_t *right;
4061
4062 if (nodes->size == 1) {
4063 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4064 } else {
4065 right = nodes->nodes[nodes->size - 1];
4066 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4067 }
4068
4069#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4070 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4071 // The resulting AST will anyway be ignored, but this file still needs to compile.
4072 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4073#else
4074 pm_node_t *right_splat_node = right;
4075#endif
4076 *node = (pm_find_pattern_node_t) {
4077 {
4078 .type = PM_FIND_PATTERN_NODE,
4079 .node_id = PM_NODE_IDENTIFY(parser),
4080 .location = {
4081 .start = left->location.start,
4082 .end = right->location.end,
4083 },
4084 },
4085 .constant = NULL,
4086 .left = left_splat_node,
4087 .right = right_splat_node,
4088 .requireds = { 0 },
4089 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4090 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4091 };
4092
4093 // For now we're going to just copy over each pointer manually. This could be
4094 // much more efficient, as we could instead resize the node list to only point
4095 // to 1...-1.
4096 for (size_t index = 1; index < nodes->size - 1; index++) {
4097 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4098 }
4099
4100 return node;
4101}
4102
4107static double
4108pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4109 ptrdiff_t diff = token->end - token->start;
4110 if (diff <= 0) return 0.0;
4111
4112 // First, get a buffer of the content.
4113 size_t length = (size_t) diff;
4114 char *buffer = xmalloc(sizeof(char) * (length + 1));
4115 memcpy((void *) buffer, token->start, length);
4116
4117 // Next, determine if we need to replace the decimal point because of
4118 // locale-specific options, and then normalize them if we have to.
4119 char decimal_point = *localeconv()->decimal_point;
4120 if (decimal_point != '.') {
4121 for (size_t index = 0; index < length; index++) {
4122 if (buffer[index] == '.') buffer[index] = decimal_point;
4123 }
4124 }
4125
4126 // Next, handle underscores by removing them from the buffer.
4127 for (size_t index = 0; index < length; index++) {
4128 if (buffer[index] == '_') {
4129 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4130 length--;
4131 }
4132 }
4133
4134 // Null-terminate the buffer so that strtod cannot read off the end.
4135 buffer[length] = '\0';
4136
4137 // Now, call strtod to parse the value. Note that CRuby has their own
4138 // version of strtod which avoids locales. We're okay using the locale-aware
4139 // version because we've already validated through the parser that the token
4140 // is in a valid format.
4141 errno = 0;
4142 char *eptr;
4143 double value = strtod(buffer, &eptr);
4144
4145 // This should never happen, because we've already checked that the token
4146 // is in a valid format. However it's good to be safe.
4147 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4148 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4149 xfree((void *) buffer);
4150 return 0.0;
4151 }
4152
4153 // If errno is set, then it should only be ERANGE. At this point we need to
4154 // check if it's infinity (it should be).
4155 if (errno == ERANGE && PRISM_ISINF(value)) {
4156 int warn_width;
4157 const char *ellipsis;
4158
4159 if (length > 20) {
4160 warn_width = 20;
4161 ellipsis = "...";
4162 } else {
4163 warn_width = (int) length;
4164 ellipsis = "";
4165 }
4166
4167 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4168 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4169 }
4170
4171 // Finally we can free the buffer and return the value.
4172 xfree((void *) buffer);
4173 return value;
4174}
4175
4179static pm_float_node_t *
4180pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4181 assert(token->type == PM_TOKEN_FLOAT);
4182 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4183
4184 *node = (pm_float_node_t) {
4185 {
4186 .type = PM_FLOAT_NODE,
4187 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4188 .node_id = PM_NODE_IDENTIFY(parser),
4189 .location = PM_LOCATION_TOKEN_VALUE(token)
4190 },
4191 .value = pm_double_parse(parser, token)
4192 };
4193
4194 return node;
4195}
4196
4200static pm_imaginary_node_t *
4201pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4202 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4203
4204 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4205 *node = (pm_imaginary_node_t) {
4206 {
4207 .type = PM_IMAGINARY_NODE,
4208 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4209 .node_id = PM_NODE_IDENTIFY(parser),
4210 .location = PM_LOCATION_TOKEN_VALUE(token)
4211 },
4212 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4213 .type = PM_TOKEN_FLOAT,
4214 .start = token->start,
4215 .end = token->end - 1
4216 }))
4217 };
4218
4219 return node;
4220}
4221
4225static pm_rational_node_t *
4226pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4227 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4228
4229 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4230 *node = (pm_rational_node_t) {
4231 {
4232 .type = PM_RATIONAL_NODE,
4233 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4234 .node_id = PM_NODE_IDENTIFY(parser),
4235 .location = PM_LOCATION_TOKEN_VALUE(token)
4236 },
4237 .numerator = { 0 },
4238 .denominator = { 0 }
4239 };
4240
4241 const uint8_t *start = token->start;
4242 const uint8_t *end = token->end - 1; // r
4243
4244 while (start < end && *start == '0') start++; // 0.1 -> .1
4245 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4246
4247 size_t length = (size_t) (end - start);
4248 if (length == 1) {
4249 node->denominator.value = 1;
4250 return node;
4251 }
4252
4253 const uint8_t *point = memchr(start, '.', length);
4254 assert(point && "should have a decimal point");
4255
4256 uint8_t *digits = xmalloc(length);
4257 if (digits == NULL) {
4258 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259 abort();
4260 }
4261
4262 memcpy(digits, start, (unsigned long) (point - start));
4263 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4264 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4265
4266 digits[0] = '1';
4267 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269 xfree(digits);
4270
4271 pm_integers_reduce(&node->numerator, &node->denominator);
4272 return node;
4273}
4274
4279static pm_imaginary_node_t *
4280pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4281 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4282
4283 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4284 *node = (pm_imaginary_node_t) {
4285 {
4286 .type = PM_IMAGINARY_NODE,
4287 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4288 .node_id = PM_NODE_IDENTIFY(parser),
4289 .location = PM_LOCATION_TOKEN_VALUE(token)
4290 },
4291 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4292 .type = PM_TOKEN_FLOAT_RATIONAL,
4293 .start = token->start,
4294 .end = token->end - 1
4295 }))
4296 };
4297
4298 return node;
4299}
4300
4304static pm_for_node_t *
4305pm_for_node_create(
4306 pm_parser_t *parser,
4307 pm_node_t *index,
4308 pm_node_t *collection,
4309 pm_statements_node_t *statements,
4310 const pm_token_t *for_keyword,
4311 const pm_token_t *in_keyword,
4312 const pm_token_t *do_keyword,
4313 const pm_token_t *end_keyword
4314) {
4315 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4316
4317 *node = (pm_for_node_t) {
4318 {
4319 .type = PM_FOR_NODE,
4320 .node_id = PM_NODE_IDENTIFY(parser),
4321 .location = {
4322 .start = for_keyword->start,
4323 .end = end_keyword->end
4324 },
4325 },
4326 .index = index,
4327 .collection = collection,
4328 .statements = statements,
4329 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4330 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4331 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4332 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4333 };
4334
4335 return node;
4336}
4337
4341static pm_forwarding_arguments_node_t *
4342pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4343 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4344 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4345
4346 *node = (pm_forwarding_arguments_node_t) {{
4347 .type = PM_FORWARDING_ARGUMENTS_NODE,
4348 .node_id = PM_NODE_IDENTIFY(parser),
4349 .location = PM_LOCATION_TOKEN_VALUE(token)
4350 }};
4351
4352 return node;
4353}
4354
4358static pm_forwarding_parameter_node_t *
4359pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4360 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4361 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4362
4363 *node = (pm_forwarding_parameter_node_t) {{
4364 .type = PM_FORWARDING_PARAMETER_NODE,
4365 .node_id = PM_NODE_IDENTIFY(parser),
4366 .location = PM_LOCATION_TOKEN_VALUE(token)
4367 }};
4368
4369 return node;
4370}
4371
4375static pm_forwarding_super_node_t *
4376pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4377 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4378 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4379 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4380
4381 pm_block_node_t *block = NULL;
4382 if (arguments->block != NULL) {
4383 block = (pm_block_node_t *) arguments->block;
4384 }
4385
4386 *node = (pm_forwarding_super_node_t) {
4387 {
4388 .type = PM_FORWARDING_SUPER_NODE,
4389 .node_id = PM_NODE_IDENTIFY(parser),
4390 .location = {
4391 .start = token->start,
4392 .end = block != NULL ? block->base.location.end : token->end
4393 },
4394 },
4395 .block = block
4396 };
4397
4398 return node;
4399}
4400
4405static pm_hash_pattern_node_t *
4406pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4407 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4408
4409 *node = (pm_hash_pattern_node_t) {
4410 {
4411 .type = PM_HASH_PATTERN_NODE,
4412 .node_id = PM_NODE_IDENTIFY(parser),
4413 .location = {
4414 .start = opening->start,
4415 .end = closing->end
4416 },
4417 },
4418 .constant = NULL,
4419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4421 .elements = { 0 },
4422 .rest = NULL
4423 };
4424
4425 return node;
4426}
4427
4431static pm_hash_pattern_node_t *
4432pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4433 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4434
4435 const uint8_t *start;
4436 const uint8_t *end;
4437
4438 if (elements->size > 0) {
4439 if (rest) {
4440 start = elements->nodes[0]->location.start;
4441 end = rest->location.end;
4442 } else {
4443 start = elements->nodes[0]->location.start;
4444 end = elements->nodes[elements->size - 1]->location.end;
4445 }
4446 } else {
4447 assert(rest != NULL);
4448 start = rest->location.start;
4449 end = rest->location.end;
4450 }
4451
4452 *node = (pm_hash_pattern_node_t) {
4453 {
4454 .type = PM_HASH_PATTERN_NODE,
4455 .node_id = PM_NODE_IDENTIFY(parser),
4456 .location = {
4457 .start = start,
4458 .end = end
4459 },
4460 },
4461 .constant = NULL,
4462 .elements = { 0 },
4463 .rest = rest,
4464 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4465 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4466 };
4467
4468 pm_node_t *element;
4469 PM_NODE_LIST_FOREACH(elements, index, element) {
4470 pm_node_list_append(&node->elements, element);
4471 }
4472
4473 return node;
4474}
4475
4479static pm_constant_id_t
4480pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4481 switch (PM_NODE_TYPE(target)) {
4482 case PM_GLOBAL_VARIABLE_READ_NODE:
4483 return ((pm_global_variable_read_node_t *) target)->name;
4484 case PM_BACK_REFERENCE_READ_NODE:
4485 return ((pm_back_reference_read_node_t *) target)->name;
4486 case PM_NUMBERED_REFERENCE_READ_NODE:
4487 // This will only ever happen in the event of a syntax error, but we
4488 // still need to provide something for the node.
4489 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4490 default:
4491 assert(false && "unreachable");
4492 return (pm_constant_id_t) -1;
4493 }
4494}
4495
4499static pm_global_variable_and_write_node_t *
4500pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4501 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4502 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4503
4504 *node = (pm_global_variable_and_write_node_t) {
4505 {
4506 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4507 .node_id = PM_NODE_IDENTIFY(parser),
4508 .location = {
4509 .start = target->location.start,
4510 .end = value->location.end
4511 }
4512 },
4513 .name = pm_global_variable_write_name(parser, target),
4514 .name_loc = target->location,
4515 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4516 .value = value
4517 };
4518
4519 return node;
4520}
4521
4525static pm_global_variable_operator_write_node_t *
4526pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4527 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4528
4529 *node = (pm_global_variable_operator_write_node_t) {
4530 {
4531 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4532 .node_id = PM_NODE_IDENTIFY(parser),
4533 .location = {
4534 .start = target->location.start,
4535 .end = value->location.end
4536 }
4537 },
4538 .name = pm_global_variable_write_name(parser, target),
4539 .name_loc = target->location,
4540 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4541 .value = value,
4542 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4543 };
4544
4545 return node;
4546}
4547
4551static pm_global_variable_or_write_node_t *
4552pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4554 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4555
4556 *node = (pm_global_variable_or_write_node_t) {
4557 {
4558 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4559 .node_id = PM_NODE_IDENTIFY(parser),
4560 .location = {
4561 .start = target->location.start,
4562 .end = value->location.end
4563 }
4564 },
4565 .name = pm_global_variable_write_name(parser, target),
4566 .name_loc = target->location,
4567 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4568 .value = value
4569 };
4570
4571 return node;
4572}
4573
4577static pm_global_variable_read_node_t *
4578pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4579 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4580
4581 *node = (pm_global_variable_read_node_t) {
4582 {
4583 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4584 .node_id = PM_NODE_IDENTIFY(parser),
4585 .location = PM_LOCATION_TOKEN_VALUE(name),
4586 },
4587 .name = pm_parser_constant_id_token(parser, name)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_global_variable_read_node_t *
4597pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4598 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4599
4600 *node = (pm_global_variable_read_node_t) {
4601 {
4602 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4603 .node_id = PM_NODE_IDENTIFY(parser),
4604 .location = PM_LOCATION_NULL_VALUE(parser)
4605 },
4606 .name = name
4607 };
4608
4609 return node;
4610}
4611
4615static pm_global_variable_write_node_t *
4616pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4618
4619 *node = (pm_global_variable_write_node_t) {
4620 {
4621 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4622 .node_id = PM_NODE_IDENTIFY(parser),
4623 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4624 .location = {
4625 .start = target->location.start,
4626 .end = value->location.end
4627 },
4628 },
4629 .name = pm_global_variable_write_name(parser, target),
4630 .name_loc = PM_LOCATION_NODE_VALUE(target),
4631 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4632 .value = value
4633 };
4634
4635 return node;
4636}
4637
4641static pm_global_variable_write_node_t *
4642pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4643 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4644
4645 *node = (pm_global_variable_write_node_t) {
4646 {
4647 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4648 .node_id = PM_NODE_IDENTIFY(parser),
4649 .location = PM_LOCATION_NULL_VALUE(parser)
4650 },
4651 .name = name,
4652 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4653 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .value = value
4655 };
4656
4657 return node;
4658}
4659
4663static pm_hash_node_t *
4664pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4665 assert(opening != NULL);
4666 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4667
4668 *node = (pm_hash_node_t) {
4669 {
4670 .type = PM_HASH_NODE,
4671 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4672 .node_id = PM_NODE_IDENTIFY(parser),
4673 .location = PM_LOCATION_TOKEN_VALUE(opening)
4674 },
4675 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4676 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4677 .elements = { 0 }
4678 };
4679
4680 return node;
4681}
4682
4686static inline void
4687pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4688 pm_node_list_append(&hash->elements, element);
4689
4690 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4691 if (static_literal) {
4692 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4693 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4694 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4696 }
4697
4698 if (!static_literal) {
4699 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4700 }
4701}
4702
4703static inline void
4704pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4705 hash->base.location.end = token->end;
4706 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4707}
4708
4712static pm_if_node_t *
4713pm_if_node_create(pm_parser_t *parser,
4714 const pm_token_t *if_keyword,
4715 pm_node_t *predicate,
4716 const pm_token_t *then_keyword,
4717 pm_statements_node_t *statements,
4718 pm_node_t *subsequent,
4719 const pm_token_t *end_keyword
4720) {
4721 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4722 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4723
4724 const uint8_t *end;
4725 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4726 end = end_keyword->end;
4727 } else if (subsequent != NULL) {
4728 end = subsequent->location.end;
4729 } else if (pm_statements_node_body_length(statements) != 0) {
4730 end = statements->base.location.end;
4731 } else {
4732 end = predicate->location.end;
4733 }
4734
4735 *node = (pm_if_node_t) {
4736 {
4737 .type = PM_IF_NODE,
4738 .flags = PM_NODE_FLAG_NEWLINE,
4739 .node_id = PM_NODE_IDENTIFY(parser),
4740 .location = {
4741 .start = if_keyword->start,
4742 .end = end
4743 },
4744 },
4745 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4746 .predicate = predicate,
4747 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4748 .statements = statements,
4749 .subsequent = subsequent,
4750 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4751 };
4752
4753 return node;
4754}
4755
4759static pm_if_node_t *
4760pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4761 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4762 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4763
4764 pm_statements_node_t *statements = pm_statements_node_create(parser);
4765 pm_statements_node_body_append(parser, statements, statement, true);
4766
4767 *node = (pm_if_node_t) {
4768 {
4769 .type = PM_IF_NODE,
4770 .flags = PM_NODE_FLAG_NEWLINE,
4771 .node_id = PM_NODE_IDENTIFY(parser),
4772 .location = {
4773 .start = statement->location.start,
4774 .end = predicate->location.end
4775 },
4776 },
4777 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4778 .predicate = predicate,
4779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4780 .statements = statements,
4781 .subsequent = NULL,
4782 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4783 };
4784
4785 return node;
4786}
4787
4791static pm_if_node_t *
4792pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4793 pm_assert_value_expression(parser, predicate);
4794 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4795
4796 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4797 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4798
4799 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4800 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4801
4802 pm_token_t end_keyword = not_provided(parser);
4803 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4804
4805 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4806
4807 *node = (pm_if_node_t) {
4808 {
4809 .type = PM_IF_NODE,
4810 .flags = PM_NODE_FLAG_NEWLINE,
4811 .node_id = PM_NODE_IDENTIFY(parser),
4812 .location = {
4813 .start = predicate->location.start,
4814 .end = false_expression->location.end,
4815 },
4816 },
4817 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4818 .predicate = predicate,
4819 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4820 .statements = if_statements,
4821 .subsequent = (pm_node_t *) else_node,
4822 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4823 };
4824
4825 return node;
4826
4827}
4828
4829static inline void
4830pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4831 node->base.location.end = keyword->end;
4832 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4833}
4834
4835static inline void
4836pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4837 node->base.location.end = keyword->end;
4838 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4839}
4840
4844static pm_implicit_node_t *
4845pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4846 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4847
4848 *node = (pm_implicit_node_t) {
4849 {
4850 .type = PM_IMPLICIT_NODE,
4851 .node_id = PM_NODE_IDENTIFY(parser),
4852 .location = value->location
4853 },
4854 .value = value
4855 };
4856
4857 return node;
4858}
4859
4863static pm_implicit_rest_node_t *
4864pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4865 assert(token->type == PM_TOKEN_COMMA);
4866
4867 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4868
4869 *node = (pm_implicit_rest_node_t) {
4870 {
4871 .type = PM_IMPLICIT_REST_NODE,
4872 .node_id = PM_NODE_IDENTIFY(parser),
4873 .location = PM_LOCATION_TOKEN_VALUE(token)
4874 }
4875 };
4876
4877 return node;
4878}
4879
4883static pm_integer_node_t *
4884pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4885 assert(token->type == PM_TOKEN_INTEGER);
4886 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4887
4888 *node = (pm_integer_node_t) {
4889 {
4890 .type = PM_INTEGER_NODE,
4891 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4892 .node_id = PM_NODE_IDENTIFY(parser),
4893 .location = PM_LOCATION_TOKEN_VALUE(token)
4894 },
4895 .value = { 0 }
4896 };
4897
4898 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4899 switch (base) {
4900 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4901 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4902 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4903 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4904 default: assert(false && "unreachable"); break;
4905 }
4906
4907 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4908 return node;
4909}
4910
4915static pm_imaginary_node_t *
4916pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4917 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4918
4919 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4920 *node = (pm_imaginary_node_t) {
4921 {
4922 .type = PM_IMAGINARY_NODE,
4923 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4924 .node_id = PM_NODE_IDENTIFY(parser),
4925 .location = PM_LOCATION_TOKEN_VALUE(token)
4926 },
4927 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4928 .type = PM_TOKEN_INTEGER,
4929 .start = token->start,
4930 .end = token->end - 1
4931 }))
4932 };
4933
4934 return node;
4935}
4936
4941static pm_rational_node_t *
4942pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4943 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4944
4945 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4946 *node = (pm_rational_node_t) {
4947 {
4948 .type = PM_RATIONAL_NODE,
4949 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4950 .node_id = PM_NODE_IDENTIFY(parser),
4951 .location = PM_LOCATION_TOKEN_VALUE(token)
4952 },
4953 .numerator = { 0 },
4954 .denominator = { .value = 1, 0 }
4955 };
4956
4957 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4958 switch (base) {
4959 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4960 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4961 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4962 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4963 default: assert(false && "unreachable"); break;
4964 }
4965
4966 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4967
4968 return node;
4969}
4970
4975static pm_imaginary_node_t *
4976pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4977 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4978
4979 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4980 *node = (pm_imaginary_node_t) {
4981 {
4982 .type = PM_IMAGINARY_NODE,
4983 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4984 .node_id = PM_NODE_IDENTIFY(parser),
4985 .location = PM_LOCATION_TOKEN_VALUE(token)
4986 },
4987 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4988 .type = PM_TOKEN_INTEGER_RATIONAL,
4989 .start = token->start,
4990 .end = token->end - 1
4991 }))
4992 };
4993
4994 return node;
4995}
4996
5000static pm_in_node_t *
5001pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5002 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5003
5004 const uint8_t *end;
5005 if (statements != NULL) {
5006 end = statements->base.location.end;
5007 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5008 end = then_keyword->end;
5009 } else {
5010 end = pattern->location.end;
5011 }
5012
5013 *node = (pm_in_node_t) {
5014 {
5015 .type = PM_IN_NODE,
5016 .node_id = PM_NODE_IDENTIFY(parser),
5017 .location = {
5018 .start = in_keyword->start,
5019 .end = end
5020 },
5021 },
5022 .pattern = pattern,
5023 .statements = statements,
5024 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5025 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5026 };
5027
5028 return node;
5029}
5030
5034static pm_instance_variable_and_write_node_t *
5035pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5036 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5037 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5038
5039 *node = (pm_instance_variable_and_write_node_t) {
5040 {
5041 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5042 .node_id = PM_NODE_IDENTIFY(parser),
5043 .location = {
5044 .start = target->base.location.start,
5045 .end = value->location.end
5046 }
5047 },
5048 .name = target->name,
5049 .name_loc = target->base.location,
5050 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5051 .value = value
5052 };
5053
5054 return node;
5055}
5056
5060static pm_instance_variable_operator_write_node_t *
5061pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5062 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5063
5064 *node = (pm_instance_variable_operator_write_node_t) {
5065 {
5066 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5067 .node_id = PM_NODE_IDENTIFY(parser),
5068 .location = {
5069 .start = target->base.location.start,
5070 .end = value->location.end
5071 }
5072 },
5073 .name = target->name,
5074 .name_loc = target->base.location,
5075 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5076 .value = value,
5077 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5078 };
5079
5080 return node;
5081}
5082
5086static pm_instance_variable_or_write_node_t *
5087pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5088 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5089 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5090
5091 *node = (pm_instance_variable_or_write_node_t) {
5092 {
5093 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5094 .node_id = PM_NODE_IDENTIFY(parser),
5095 .location = {
5096 .start = target->base.location.start,
5097 .end = value->location.end
5098 }
5099 },
5100 .name = target->name,
5101 .name_loc = target->base.location,
5102 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5103 .value = value
5104 };
5105
5106 return node;
5107}
5108
5112static pm_instance_variable_read_node_t *
5113pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5114 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5115 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5116
5117 *node = (pm_instance_variable_read_node_t) {
5118 {
5119 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5120 .node_id = PM_NODE_IDENTIFY(parser),
5121 .location = PM_LOCATION_TOKEN_VALUE(token)
5122 },
5123 .name = pm_parser_constant_id_token(parser, token)
5124 };
5125
5126 return node;
5127}
5128
5133static pm_instance_variable_write_node_t *
5134pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5135 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5136 *node = (pm_instance_variable_write_node_t) {
5137 {
5138 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5139 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5140 .node_id = PM_NODE_IDENTIFY(parser),
5141 .location = {
5142 .start = read_node->base.location.start,
5143 .end = value->location.end
5144 }
5145 },
5146 .name = read_node->name,
5147 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5148 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5149 .value = value
5150 };
5151
5152 return node;
5153}
5154
5160static void
5161pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5162 switch (PM_NODE_TYPE(part)) {
5163 case PM_STRING_NODE:
5164 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5165 break;
5166 case PM_EMBEDDED_STATEMENTS_NODE: {
5167 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5168 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5169
5170 if (embedded == NULL) {
5171 // If there are no statements or more than one statement, then
5172 // we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5175 // If the embedded statement is a string, then we can keep the
5176 // static literal flag and mark the string as frozen.
5177 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5178 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5179 // If the embedded statement is an interpolated string and it's
5180 // a static literal, then we can keep the static literal flag.
5181 } else {
5182 // Otherwise we lose the static literal flag.
5183 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5184 }
5185
5186 break;
5187 }
5188 case PM_EMBEDDED_VARIABLE_NODE:
5189 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5190 break;
5191 default:
5192 assert(false && "unexpected node type");
5193 break;
5194 }
5195
5196 pm_node_list_append(parts, part);
5197}
5198
5202static pm_interpolated_regular_expression_node_t *
5203pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5204 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5205
5206 *node = (pm_interpolated_regular_expression_node_t) {
5207 {
5208 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5210 .node_id = PM_NODE_IDENTIFY(parser),
5211 .location = {
5212 .start = opening->start,
5213 .end = NULL,
5214 },
5215 },
5216 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5217 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .parts = { 0 }
5219 };
5220
5221 return node;
5222}
5223
5224static inline void
5225pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5226 if (node->base.location.start > part->location.start) {
5227 node->base.location.start = part->location.start;
5228 }
5229 if (node->base.location.end < part->location.end) {
5230 node->base.location.end = part->location.end;
5231 }
5232
5233 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5234}
5235
5236static inline void
5237pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5238 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5239 node->base.location.end = closing->end;
5240 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5241}
5242
5266static inline void
5267pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5268#define CLEAR_FLAGS(node) \
5269 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5270
5271#define MUTABLE_FLAGS(node) \
5272 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5273
5274 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5275 node->base.location.start = part->location.start;
5276 }
5277
5278 node->base.location.end = MAX(node->base.location.end, part->location.end);
5279
5280 switch (PM_NODE_TYPE(part)) {
5281 case PM_STRING_NODE:
5282 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5283 break;
5284 case PM_INTERPOLATED_STRING_NODE:
5285 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5286 // If the string that we're concatenating is a static literal,
5287 // then we can keep the static literal flag for this string.
5288 } else {
5289 // Otherwise, we lose the static literal flag here and we should
5290 // also clear the mutability flags.
5291 CLEAR_FLAGS(node);
5292 }
5293 break;
5294 case PM_EMBEDDED_STATEMENTS_NODE: {
5295 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5296 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5297
5298 if (embedded == NULL) {
5299 // If we're embedding multiple statements or no statements, then
5300 // the string is not longer a static literal.
5301 CLEAR_FLAGS(node);
5302 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5303 // If the embedded statement is a string, then we can make that
5304 // string as frozen and static literal, and not touch the static
5305 // literal status of this string.
5306 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5307
5308 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5309 MUTABLE_FLAGS(node);
5310 }
5311 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5312 // If the embedded statement is an interpolated string, but that
5313 // string is marked as static literal, then we can keep our
5314 // static literal status for this string.
5315 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 MUTABLE_FLAGS(node);
5317 }
5318 } else {
5319 // In all other cases, we lose the static literal flag here and
5320 // become mutable.
5321 CLEAR_FLAGS(node);
5322 }
5323
5324 break;
5325 }
5326 case PM_EMBEDDED_VARIABLE_NODE:
5327 // Embedded variables clear static literal, which means we also
5328 // should clear the mutability flags.
5329 CLEAR_FLAGS(node);
5330 break;
5331 case PM_X_STRING_NODE:
5332 case PM_INTERPOLATED_X_STRING_NODE:
5333 // If this is an x string, then this is a syntax error. But we want
5334 // to handle it here so that we don't fail the assertion.
5335 CLEAR_FLAGS(node);
5336 break;
5337 default:
5338 assert(false && "unexpected node type");
5339 break;
5340 }
5341
5342 pm_node_list_append(&node->parts, part);
5343
5344#undef CLEAR_FLAGS
5345#undef MUTABLE_FLAGS
5346}
5347
5351static pm_interpolated_string_node_t *
5352pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5353 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5354 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5355
5356 switch (parser->frozen_string_literal) {
5357 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5358 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5359 break;
5360 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5361 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5362 break;
5363 }
5364
5365 *node = (pm_interpolated_string_node_t) {
5366 {
5367 .type = PM_INTERPOLATED_STRING_NODE,
5368 .flags = flags,
5369 .node_id = PM_NODE_IDENTIFY(parser),
5370 .location = {
5371 .start = opening->start,
5372 .end = closing->end,
5373 },
5374 },
5375 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5376 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5377 .parts = { 0 }
5378 };
5379
5380 if (parts != NULL) {
5381 pm_node_t *part;
5382 PM_NODE_LIST_FOREACH(parts, index, part) {
5383 pm_interpolated_string_node_append(node, part);
5384 }
5385 }
5386
5387 return node;
5388}
5389
5393static void
5394pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5395 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396 node->base.location.end = closing->end;
5397}
5398
5399static void
5400pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5401 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5402 node->base.location.start = part->location.start;
5403 }
5404
5405 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5406 node->base.location.end = MAX(node->base.location.end, part->location.end);
5407}
5408
5409static void
5410pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5411 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5412 node->base.location.end = closing->end;
5413}
5414
5418static pm_interpolated_symbol_node_t *
5419pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5420 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5421
5422 *node = (pm_interpolated_symbol_node_t) {
5423 {
5424 .type = PM_INTERPOLATED_SYMBOL_NODE,
5425 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5426 .node_id = PM_NODE_IDENTIFY(parser),
5427 .location = {
5428 .start = opening->start,
5429 .end = closing->end,
5430 },
5431 },
5432 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5433 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5434 .parts = { 0 }
5435 };
5436
5437 if (parts != NULL) {
5438 pm_node_t *part;
5439 PM_NODE_LIST_FOREACH(parts, index, part) {
5440 pm_interpolated_symbol_node_append(node, part);
5441 }
5442 }
5443
5444 return node;
5445}
5446
5450static pm_interpolated_x_string_node_t *
5451pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5452 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5453
5454 *node = (pm_interpolated_x_string_node_t) {
5455 {
5456 .type = PM_INTERPOLATED_X_STRING_NODE,
5457 .node_id = PM_NODE_IDENTIFY(parser),
5458 .location = {
5459 .start = opening->start,
5460 .end = closing->end
5461 },
5462 },
5463 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5464 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5465 .parts = { 0 }
5466 };
5467
5468 return node;
5469}
5470
5471static inline void
5472pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5473 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5474 node->base.location.end = part->location.end;
5475}
5476
5477static inline void
5478pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5479 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5480 node->base.location.end = closing->end;
5481}
5482
5486static pm_it_local_variable_read_node_t *
5487pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5488 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5489
5490 *node = (pm_it_local_variable_read_node_t) {
5491 {
5492 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5493 .node_id = PM_NODE_IDENTIFY(parser),
5494 .location = PM_LOCATION_TOKEN_VALUE(name)
5495 }
5496 };
5497
5498 return node;
5499}
5500
5504static pm_it_parameters_node_t *
5505pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5506 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5507
5508 *node = (pm_it_parameters_node_t) {
5509 {
5510 .type = PM_IT_PARAMETERS_NODE,
5511 .node_id = PM_NODE_IDENTIFY(parser),
5512 .location = {
5513 .start = opening->start,
5514 .end = closing->end
5515 }
5516 }
5517 };
5518
5519 return node;
5520}
5521
5525static pm_keyword_hash_node_t *
5526pm_keyword_hash_node_create(pm_parser_t *parser) {
5527 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5528
5529 *node = (pm_keyword_hash_node_t) {
5530 .base = {
5531 .type = PM_KEYWORD_HASH_NODE,
5532 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5533 .node_id = PM_NODE_IDENTIFY(parser),
5534 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5535 },
5536 .elements = { 0 }
5537 };
5538
5539 return node;
5540}
5541
5545static void
5546pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5547 // If the element being added is not an AssocNode or does not have a symbol
5548 // key, then we want to turn the SYMBOL_KEYS flag off.
5549 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5550 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5551 }
5552
5553 pm_node_list_append(&hash->elements, element);
5554 if (hash->base.location.start == NULL) {
5555 hash->base.location.start = element->location.start;
5556 }
5557 hash->base.location.end = element->location.end;
5558}
5559
5563static pm_required_keyword_parameter_node_t *
5564pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5565 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5566
5567 *node = (pm_required_keyword_parameter_node_t) {
5568 {
5569 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5570 .node_id = PM_NODE_IDENTIFY(parser),
5571 .location = {
5572 .start = name->start,
5573 .end = name->end
5574 },
5575 },
5576 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5577 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5578 };
5579
5580 return node;
5581}
5582
5586static pm_optional_keyword_parameter_node_t *
5587pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5588 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5589
5590 *node = (pm_optional_keyword_parameter_node_t) {
5591 {
5592 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5593 .node_id = PM_NODE_IDENTIFY(parser),
5594 .location = {
5595 .start = name->start,
5596 .end = value->location.end
5597 },
5598 },
5599 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5600 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5601 .value = value
5602 };
5603
5604 return node;
5605}
5606
5610static pm_keyword_rest_parameter_node_t *
5611pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5612 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5613
5614 *node = (pm_keyword_rest_parameter_node_t) {
5615 {
5616 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5617 .node_id = PM_NODE_IDENTIFY(parser),
5618 .location = {
5619 .start = operator->start,
5620 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5621 },
5622 },
5623 .name = pm_parser_optional_constant_id_token(parser, name),
5624 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5625 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5626 };
5627
5628 return node;
5629}
5630
5634static pm_lambda_node_t *
5635pm_lambda_node_create(
5636 pm_parser_t *parser,
5637 pm_constant_id_list_t *locals,
5638 const pm_token_t *operator,
5639 const pm_token_t *opening,
5640 const pm_token_t *closing,
5641 pm_node_t *parameters,
5642 pm_node_t *body
5643) {
5644 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5645
5646 *node = (pm_lambda_node_t) {
5647 {
5648 .type = PM_LAMBDA_NODE,
5649 .node_id = PM_NODE_IDENTIFY(parser),
5650 .location = {
5651 .start = operator->start,
5652 .end = closing->end
5653 },
5654 },
5655 .locals = *locals,
5656 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5657 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5658 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5659 .parameters = parameters,
5660 .body = body
5661 };
5662
5663 return node;
5664}
5665
5669static pm_local_variable_and_write_node_t *
5670pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5671 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5672 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5673 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5674
5675 *node = (pm_local_variable_and_write_node_t) {
5676 {
5677 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5678 .node_id = PM_NODE_IDENTIFY(parser),
5679 .location = {
5680 .start = target->location.start,
5681 .end = value->location.end
5682 }
5683 },
5684 .name_loc = target->location,
5685 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5686 .value = value,
5687 .name = name,
5688 .depth = depth
5689 };
5690
5691 return node;
5692}
5693
5697static pm_local_variable_operator_write_node_t *
5698pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5699 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5700
5701 *node = (pm_local_variable_operator_write_node_t) {
5702 {
5703 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5704 .node_id = PM_NODE_IDENTIFY(parser),
5705 .location = {
5706 .start = target->location.start,
5707 .end = value->location.end
5708 }
5709 },
5710 .name_loc = target->location,
5711 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5712 .value = value,
5713 .name = name,
5714 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5715 .depth = depth
5716 };
5717
5718 return node;
5719}
5720
5724static pm_local_variable_or_write_node_t *
5725pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5726 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5727 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5728 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5729
5730 *node = (pm_local_variable_or_write_node_t) {
5731 {
5732 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5733 .node_id = PM_NODE_IDENTIFY(parser),
5734 .location = {
5735 .start = target->location.start,
5736 .end = value->location.end
5737 }
5738 },
5739 .name_loc = target->location,
5740 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5741 .value = value,
5742 .name = name,
5743 .depth = depth
5744 };
5745
5746 return node;
5747}
5748
5752static pm_local_variable_read_node_t *
5753pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5754 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5755
5756 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5757
5758 *node = (pm_local_variable_read_node_t) {
5759 {
5760 .type = PM_LOCAL_VARIABLE_READ_NODE,
5761 .node_id = PM_NODE_IDENTIFY(parser),
5762 .location = PM_LOCATION_TOKEN_VALUE(name)
5763 },
5764 .name = name_id,
5765 .depth = depth
5766 };
5767
5768 return node;
5769}
5770
5774static pm_local_variable_read_node_t *
5775pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5776 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5777 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5778}
5779
5784static pm_local_variable_read_node_t *
5785pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5786 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5787 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5788}
5789
5793static pm_local_variable_write_node_t *
5794pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5795 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5796
5797 *node = (pm_local_variable_write_node_t) {
5798 {
5799 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5800 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5801 .node_id = PM_NODE_IDENTIFY(parser),
5802 .location = {
5803 .start = name_loc->start,
5804 .end = value->location.end
5805 }
5806 },
5807 .name = name,
5808 .depth = depth,
5809 .value = value,
5810 .name_loc = *name_loc,
5811 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5812 };
5813
5814 return node;
5815}
5816
5820static inline bool
5821pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5822 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5823}
5824
5829static inline bool
5830pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5831 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5832}
5833
5838static inline void
5839pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5840 if (pm_token_is_numbered_parameter(start, end)) {
5841 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5842 }
5843}
5844
5849static pm_local_variable_target_node_t *
5850pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5851 pm_refute_numbered_parameter(parser, location->start, location->end);
5852 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5853
5854 *node = (pm_local_variable_target_node_t) {
5855 {
5856 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5857 .node_id = PM_NODE_IDENTIFY(parser),
5858 .location = *location
5859 },
5860 .name = name,
5861 .depth = depth
5862 };
5863
5864 return node;
5865}
5866
5870static pm_match_predicate_node_t *
5871pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5872 pm_assert_value_expression(parser, value);
5873
5874 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5875
5876 *node = (pm_match_predicate_node_t) {
5877 {
5878 .type = PM_MATCH_PREDICATE_NODE,
5879 .node_id = PM_NODE_IDENTIFY(parser),
5880 .location = {
5881 .start = value->location.start,
5882 .end = pattern->location.end
5883 }
5884 },
5885 .value = value,
5886 .pattern = pattern,
5887 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5888 };
5889
5890 return node;
5891}
5892
5896static pm_match_required_node_t *
5897pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5898 pm_assert_value_expression(parser, value);
5899
5900 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5901
5902 *node = (pm_match_required_node_t) {
5903 {
5904 .type = PM_MATCH_REQUIRED_NODE,
5905 .node_id = PM_NODE_IDENTIFY(parser),
5906 .location = {
5907 .start = value->location.start,
5908 .end = pattern->location.end
5909 }
5910 },
5911 .value = value,
5912 .pattern = pattern,
5913 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5914 };
5915
5916 return node;
5917}
5918
5922static pm_match_write_node_t *
5923pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5924 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5925
5926 *node = (pm_match_write_node_t) {
5927 {
5928 .type = PM_MATCH_WRITE_NODE,
5929 .node_id = PM_NODE_IDENTIFY(parser),
5930 .location = call->base.location
5931 },
5932 .call = call,
5933 .targets = { 0 }
5934 };
5935
5936 return node;
5937}
5938
5942static pm_module_node_t *
5943pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5944 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5945
5946 *node = (pm_module_node_t) {
5947 {
5948 .type = PM_MODULE_NODE,
5949 .node_id = PM_NODE_IDENTIFY(parser),
5950 .location = {
5951 .start = module_keyword->start,
5952 .end = end_keyword->end
5953 }
5954 },
5955 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5956 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5957 .constant_path = constant_path,
5958 .body = body,
5959 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5960 .name = pm_parser_constant_id_token(parser, name)
5961 };
5962
5963 return node;
5964}
5965
5969static pm_multi_target_node_t *
5970pm_multi_target_node_create(pm_parser_t *parser) {
5971 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5972
5973 *node = (pm_multi_target_node_t) {
5974 {
5975 .type = PM_MULTI_TARGET_NODE,
5976 .node_id = PM_NODE_IDENTIFY(parser),
5977 .location = { .start = NULL, .end = NULL }
5978 },
5979 .lefts = { 0 },
5980 .rest = NULL,
5981 .rights = { 0 },
5982 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5983 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5984 };
5985
5986 return node;
5987}
5988
5992static void
5993pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5994 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5995 if (node->rest == NULL) {
5996 node->rest = target;
5997 } else {
5998 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5999 pm_node_list_append(&node->rights, target);
6000 }
6001 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6002 if (node->rest == NULL) {
6003 node->rest = target;
6004 } else {
6005 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6006 pm_node_list_append(&node->rights, target);
6007 }
6008 } else if (node->rest == NULL) {
6009 pm_node_list_append(&node->lefts, target);
6010 } else {
6011 pm_node_list_append(&node->rights, target);
6012 }
6013
6014 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6015 node->base.location.start = target->location.start;
6016 }
6017
6018 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6019 node->base.location.end = target->location.end;
6020 }
6021}
6022
6026static void
6027pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6028 node->base.location.start = lparen->start;
6029 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6030}
6031
6035static void
6036pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6037 node->base.location.end = rparen->end;
6038 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6039}
6040
6044static pm_multi_write_node_t *
6045pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6046 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6047
6048 *node = (pm_multi_write_node_t) {
6049 {
6050 .type = PM_MULTI_WRITE_NODE,
6051 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6052 .node_id = PM_NODE_IDENTIFY(parser),
6053 .location = {
6054 .start = target->base.location.start,
6055 .end = value->location.end
6056 }
6057 },
6058 .lefts = target->lefts,
6059 .rest = target->rest,
6060 .rights = target->rights,
6061 .lparen_loc = target->lparen_loc,
6062 .rparen_loc = target->rparen_loc,
6063 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6064 .value = value
6065 };
6066
6067 // Explicitly do not call pm_node_destroy here because we want to keep
6068 // around all of the information within the MultiWriteNode node.
6069 xfree(target);
6070
6071 return node;
6072}
6073
6077static pm_next_node_t *
6078pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6079 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6080 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6081
6082 *node = (pm_next_node_t) {
6083 {
6084 .type = PM_NEXT_NODE,
6085 .node_id = PM_NODE_IDENTIFY(parser),
6086 .location = {
6087 .start = keyword->start,
6088 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6089 }
6090 },
6091 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6092 .arguments = arguments
6093 };
6094
6095 return node;
6096}
6097
6101static pm_nil_node_t *
6102pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6103 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6104 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6105
6106 *node = (pm_nil_node_t) {{
6107 .type = PM_NIL_NODE,
6108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6109 .node_id = PM_NODE_IDENTIFY(parser),
6110 .location = PM_LOCATION_TOKEN_VALUE(token)
6111 }};
6112
6113 return node;
6114}
6115
6119static pm_no_keywords_parameter_node_t *
6120pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6121 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6122 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6123 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6124
6125 *node = (pm_no_keywords_parameter_node_t) {
6126 {
6127 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6128 .node_id = PM_NODE_IDENTIFY(parser),
6129 .location = {
6130 .start = operator->start,
6131 .end = keyword->end
6132 }
6133 },
6134 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6135 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6136 };
6137
6138 return node;
6139}
6140
6144static pm_numbered_parameters_node_t *
6145pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6146 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6147
6148 *node = (pm_numbered_parameters_node_t) {
6149 {
6150 .type = PM_NUMBERED_PARAMETERS_NODE,
6151 .node_id = PM_NODE_IDENTIFY(parser),
6152 .location = *location
6153 },
6154 .maximum = maximum
6155 };
6156
6157 return node;
6158}
6159
6164#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6165
6172static uint32_t
6173pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6174 const uint8_t *start = token->start + 1;
6175 const uint8_t *end = token->end;
6176
6177 ptrdiff_t diff = end - start;
6178 assert(diff > 0);
6179#if PTRDIFF_MAX > SIZE_MAX
6180 assert(diff < (ptrdiff_t) SIZE_MAX);
6181#endif
6182 size_t length = (size_t) diff;
6183
6184 char *digits = xcalloc(length + 1, sizeof(char));
6185 memcpy(digits, start, length);
6186 digits[length] = '\0';
6187
6188 char *endptr;
6189 errno = 0;
6190 unsigned long value = strtoul(digits, &endptr, 10);
6191
6192 if ((digits == endptr) || (*endptr != '\0')) {
6193 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6194 value = 0;
6195 }
6196
6197 xfree(digits);
6198
6199 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6200 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6201 value = 0;
6202 }
6203
6204 return (uint32_t) value;
6205}
6206
6207#undef NTH_REF_MAX
6208
6212static pm_numbered_reference_read_node_t *
6213pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6214 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6215 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6216
6217 *node = (pm_numbered_reference_read_node_t) {
6218 {
6219 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6220 .node_id = PM_NODE_IDENTIFY(parser),
6221 .location = PM_LOCATION_TOKEN_VALUE(name),
6222 },
6223 .number = pm_numbered_reference_read_node_number(parser, name)
6224 };
6225
6226 return node;
6227}
6228
6232static pm_optional_parameter_node_t *
6233pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6234 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6235
6236 *node = (pm_optional_parameter_node_t) {
6237 {
6238 .type = PM_OPTIONAL_PARAMETER_NODE,
6239 .node_id = PM_NODE_IDENTIFY(parser),
6240 .location = {
6241 .start = name->start,
6242 .end = value->location.end
6243 }
6244 },
6245 .name = pm_parser_constant_id_token(parser, name),
6246 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6247 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6248 .value = value
6249 };
6250
6251 return node;
6252}
6253
6257static pm_or_node_t *
6258pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6259 pm_assert_value_expression(parser, left);
6260
6261 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6262
6263 *node = (pm_or_node_t) {
6264 {
6265 .type = PM_OR_NODE,
6266 .node_id = PM_NODE_IDENTIFY(parser),
6267 .location = {
6268 .start = left->location.start,
6269 .end = right->location.end
6270 }
6271 },
6272 .left = left,
6273 .right = right,
6274 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6275 };
6276
6277 return node;
6278}
6279
6283static pm_parameters_node_t *
6284pm_parameters_node_create(pm_parser_t *parser) {
6285 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6286
6287 *node = (pm_parameters_node_t) {
6288 {
6289 .type = PM_PARAMETERS_NODE,
6290 .node_id = PM_NODE_IDENTIFY(parser),
6291 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6292 },
6293 .rest = NULL,
6294 .keyword_rest = NULL,
6295 .block = NULL,
6296 .requireds = { 0 },
6297 .optionals = { 0 },
6298 .posts = { 0 },
6299 .keywords = { 0 }
6300 };
6301
6302 return node;
6303}
6304
6308static void
6309pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6310 if (params->base.location.start == NULL) {
6311 params->base.location.start = param->location.start;
6312 } else {
6313 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6314 }
6315
6316 if (params->base.location.end == NULL) {
6317 params->base.location.end = param->location.end;
6318 } else {
6319 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6320 }
6321}
6322
6326static void
6327pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6328 pm_parameters_node_location_set(params, param);
6329 pm_node_list_append(&params->requireds, param);
6330}
6331
6335static void
6336pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6337 pm_parameters_node_location_set(params, (pm_node_t *) param);
6338 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6339}
6340
6344static void
6345pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6346 pm_parameters_node_location_set(params, param);
6347 pm_node_list_append(&params->posts, param);
6348}
6349
6353static void
6354pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6355 pm_parameters_node_location_set(params, param);
6356 params->rest = param;
6357}
6358
6362static void
6363pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6364 pm_parameters_node_location_set(params, param);
6365 pm_node_list_append(&params->keywords, param);
6366}
6367
6371static void
6372pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6373 assert(params->keyword_rest == NULL);
6374 pm_parameters_node_location_set(params, param);
6375 params->keyword_rest = param;
6376}
6377
6381static void
6382pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6383 assert(params->block == NULL);
6384 pm_parameters_node_location_set(params, (pm_node_t *) param);
6385 params->block = param;
6386}
6387
6391static pm_program_node_t *
6392pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6393 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6394
6395 *node = (pm_program_node_t) {
6396 {
6397 .type = PM_PROGRAM_NODE,
6398 .node_id = PM_NODE_IDENTIFY(parser),
6399 .location = {
6400 .start = statements == NULL ? parser->start : statements->base.location.start,
6401 .end = statements == NULL ? parser->end : statements->base.location.end
6402 }
6403 },
6404 .locals = *locals,
6405 .statements = statements
6406 };
6407
6408 return node;
6409}
6410
6414static pm_parentheses_node_t *
6415pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6416 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6417
6418 *node = (pm_parentheses_node_t) {
6419 {
6420 .type = PM_PARENTHESES_NODE,
6421 .flags = flags,
6422 .node_id = PM_NODE_IDENTIFY(parser),
6423 .location = {
6424 .start = opening->start,
6425 .end = closing->end
6426 }
6427 },
6428 .body = body,
6429 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6430 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6431 };
6432
6433 return node;
6434}
6435
6439static pm_pinned_expression_node_t *
6440pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6441 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6442
6443 *node = (pm_pinned_expression_node_t) {
6444 {
6445 .type = PM_PINNED_EXPRESSION_NODE,
6446 .node_id = PM_NODE_IDENTIFY(parser),
6447 .location = {
6448 .start = operator->start,
6449 .end = rparen->end
6450 }
6451 },
6452 .expression = expression,
6453 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6454 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6455 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6456 };
6457
6458 return node;
6459}
6460
6464static pm_pinned_variable_node_t *
6465pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6466 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6467
6468 *node = (pm_pinned_variable_node_t) {
6469 {
6470 .type = PM_PINNED_VARIABLE_NODE,
6471 .node_id = PM_NODE_IDENTIFY(parser),
6472 .location = {
6473 .start = operator->start,
6474 .end = variable->location.end
6475 }
6476 },
6477 .variable = variable,
6478 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6479 };
6480
6481 return node;
6482}
6483
6487static pm_post_execution_node_t *
6488pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6489 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6490
6491 *node = (pm_post_execution_node_t) {
6492 {
6493 .type = PM_POST_EXECUTION_NODE,
6494 .node_id = PM_NODE_IDENTIFY(parser),
6495 .location = {
6496 .start = keyword->start,
6497 .end = closing->end
6498 }
6499 },
6500 .statements = statements,
6501 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6502 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6503 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6504 };
6505
6506 return node;
6507}
6508
6512static pm_pre_execution_node_t *
6513pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6514 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6515
6516 *node = (pm_pre_execution_node_t) {
6517 {
6518 .type = PM_PRE_EXECUTION_NODE,
6519 .node_id = PM_NODE_IDENTIFY(parser),
6520 .location = {
6521 .start = keyword->start,
6522 .end = closing->end
6523 }
6524 },
6525 .statements = statements,
6526 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6527 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6528 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6529 };
6530
6531 return node;
6532}
6533
6537static pm_range_node_t *
6538pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6539 pm_assert_value_expression(parser, left);
6540 pm_assert_value_expression(parser, right);
6541
6542 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6543 pm_node_flags_t flags = 0;
6544
6545 // Indicate that this node is an exclusive range if the operator is `...`.
6546 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6547 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6548 }
6549
6550 // Indicate that this node is a static literal (i.e., can be compiled with
6551 // a putobject in CRuby) if the left and right are implicit nil, explicit
6552 // nil, or integers.
6553 if (
6554 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6555 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6556 ) {
6557 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6558 }
6559
6560 *node = (pm_range_node_t) {
6561 {
6562 .type = PM_RANGE_NODE,
6563 .flags = flags,
6564 .node_id = PM_NODE_IDENTIFY(parser),
6565 .location = {
6566 .start = (left == NULL ? operator->start : left->location.start),
6567 .end = (right == NULL ? operator->end : right->location.end)
6568 }
6569 },
6570 .left = left,
6571 .right = right,
6572 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6573 };
6574
6575 return node;
6576}
6577
6581static pm_redo_node_t *
6582pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6583 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6584 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6585
6586 *node = (pm_redo_node_t) {{
6587 .type = PM_REDO_NODE,
6588 .node_id = PM_NODE_IDENTIFY(parser),
6589 .location = PM_LOCATION_TOKEN_VALUE(token)
6590 }};
6591
6592 return node;
6593}
6594
6599static pm_regular_expression_node_t *
6600pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6601 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6602
6603 *node = (pm_regular_expression_node_t) {
6604 {
6605 .type = PM_REGULAR_EXPRESSION_NODE,
6606 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6607 .node_id = PM_NODE_IDENTIFY(parser),
6608 .location = {
6609 .start = MIN(opening->start, closing->start),
6610 .end = MAX(opening->end, closing->end)
6611 }
6612 },
6613 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6614 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6615 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6616 .unescaped = *unescaped
6617 };
6618
6619 return node;
6620}
6621
6625static inline pm_regular_expression_node_t *
6626pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6627 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6628}
6629
6633static pm_required_parameter_node_t *
6634pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6635 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6636
6637 *node = (pm_required_parameter_node_t) {
6638 {
6639 .type = PM_REQUIRED_PARAMETER_NODE,
6640 .node_id = PM_NODE_IDENTIFY(parser),
6641 .location = PM_LOCATION_TOKEN_VALUE(token)
6642 },
6643 .name = pm_parser_constant_id_token(parser, token)
6644 };
6645
6646 return node;
6647}
6648
6652static pm_rescue_modifier_node_t *
6653pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6654 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6655
6656 *node = (pm_rescue_modifier_node_t) {
6657 {
6658 .type = PM_RESCUE_MODIFIER_NODE,
6659 .node_id = PM_NODE_IDENTIFY(parser),
6660 .location = {
6661 .start = expression->location.start,
6662 .end = rescue_expression->location.end
6663 }
6664 },
6665 .expression = expression,
6666 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667 .rescue_expression = rescue_expression
6668 };
6669
6670 return node;
6671}
6672
6676static pm_rescue_node_t *
6677pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6678 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6679
6680 *node = (pm_rescue_node_t) {
6681 {
6682 .type = PM_RESCUE_NODE,
6683 .node_id = PM_NODE_IDENTIFY(parser),
6684 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6685 },
6686 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6687 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6688 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6689 .reference = NULL,
6690 .statements = NULL,
6691 .subsequent = NULL,
6692 .exceptions = { 0 }
6693 };
6694
6695 return node;
6696}
6697
6698static inline void
6699pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6700 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6701}
6702
6706static void
6707pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6708 node->reference = reference;
6709 node->base.location.end = reference->location.end;
6710}
6711
6715static void
6716pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6717 node->statements = statements;
6718 if (pm_statements_node_body_length(statements) > 0) {
6719 node->base.location.end = statements->base.location.end;
6720 }
6721}
6722
6726static void
6727pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6728 node->subsequent = subsequent;
6729 node->base.location.end = subsequent->base.location.end;
6730}
6731
6735static void
6736pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6737 pm_node_list_append(&node->exceptions, exception);
6738 node->base.location.end = exception->location.end;
6739}
6740
6744static pm_rest_parameter_node_t *
6745pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6746 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6747
6748 *node = (pm_rest_parameter_node_t) {
6749 {
6750 .type = PM_REST_PARAMETER_NODE,
6751 .node_id = PM_NODE_IDENTIFY(parser),
6752 .location = {
6753 .start = operator->start,
6754 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6755 }
6756 },
6757 .name = pm_parser_optional_constant_id_token(parser, name),
6758 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6759 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6760 };
6761
6762 return node;
6763}
6764
6768static pm_retry_node_t *
6769pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6770 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6771 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6772
6773 *node = (pm_retry_node_t) {{
6774 .type = PM_RETRY_NODE,
6775 .node_id = PM_NODE_IDENTIFY(parser),
6776 .location = PM_LOCATION_TOKEN_VALUE(token)
6777 }};
6778
6779 return node;
6780}
6781
6785static pm_return_node_t *
6786pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6787 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6788
6789 *node = (pm_return_node_t) {
6790 {
6791 .type = PM_RETURN_NODE,
6792 .node_id = PM_NODE_IDENTIFY(parser),
6793 .location = {
6794 .start = keyword->start,
6795 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6796 }
6797 },
6798 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6799 .arguments = arguments
6800 };
6801
6802 return node;
6803}
6804
6808static pm_self_node_t *
6809pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6810 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6811 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6812
6813 *node = (pm_self_node_t) {{
6814 .type = PM_SELF_NODE,
6815 .node_id = PM_NODE_IDENTIFY(parser),
6816 .location = PM_LOCATION_TOKEN_VALUE(token)
6817 }};
6818
6819 return node;
6820}
6821
6825static pm_shareable_constant_node_t *
6826pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6827 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6828
6829 *node = (pm_shareable_constant_node_t) {
6830 {
6831 .type = PM_SHAREABLE_CONSTANT_NODE,
6832 .flags = (pm_node_flags_t) value,
6833 .node_id = PM_NODE_IDENTIFY(parser),
6834 .location = PM_LOCATION_NODE_VALUE(write)
6835 },
6836 .write = write
6837 };
6838
6839 return node;
6840}
6841
6845static pm_singleton_class_node_t *
6846pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6847 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6848
6849 *node = (pm_singleton_class_node_t) {
6850 {
6851 .type = PM_SINGLETON_CLASS_NODE,
6852 .node_id = PM_NODE_IDENTIFY(parser),
6853 .location = {
6854 .start = class_keyword->start,
6855 .end = end_keyword->end
6856 }
6857 },
6858 .locals = *locals,
6859 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6860 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6861 .expression = expression,
6862 .body = body,
6863 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6864 };
6865
6866 return node;
6867}
6868
6872static pm_source_encoding_node_t *
6873pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6874 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6875 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6876
6877 *node = (pm_source_encoding_node_t) {{
6878 .type = PM_SOURCE_ENCODING_NODE,
6879 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6880 .node_id = PM_NODE_IDENTIFY(parser),
6881 .location = PM_LOCATION_TOKEN_VALUE(token)
6882 }};
6883
6884 return node;
6885}
6886
6890static pm_source_file_node_t*
6891pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6892 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6893 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6894
6895 pm_node_flags_t flags = 0;
6896
6897 switch (parser->frozen_string_literal) {
6898 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6899 flags |= PM_STRING_FLAGS_MUTABLE;
6900 break;
6901 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6902 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6903 break;
6904 }
6905
6906 *node = (pm_source_file_node_t) {
6907 {
6908 .type = PM_SOURCE_FILE_NODE,
6909 .flags = flags,
6910 .node_id = PM_NODE_IDENTIFY(parser),
6911 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6912 },
6913 .filepath = parser->filepath
6914 };
6915
6916 return node;
6917}
6918
6922static pm_source_line_node_t *
6923pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6924 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6925 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6926
6927 *node = (pm_source_line_node_t) {{
6928 .type = PM_SOURCE_LINE_NODE,
6929 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6930 .node_id = PM_NODE_IDENTIFY(parser),
6931 .location = PM_LOCATION_TOKEN_VALUE(token)
6932 }};
6933
6934 return node;
6935}
6936
6940static pm_splat_node_t *
6941pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6942 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6943
6944 *node = (pm_splat_node_t) {
6945 {
6946 .type = PM_SPLAT_NODE,
6947 .node_id = PM_NODE_IDENTIFY(parser),
6948 .location = {
6949 .start = operator->start,
6950 .end = (expression == NULL ? operator->end : expression->location.end)
6951 }
6952 },
6953 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6954 .expression = expression
6955 };
6956
6957 return node;
6958}
6959
6963static pm_statements_node_t *
6964pm_statements_node_create(pm_parser_t *parser) {
6965 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6966
6967 *node = (pm_statements_node_t) {
6968 {
6969 .type = PM_STATEMENTS_NODE,
6970 .node_id = PM_NODE_IDENTIFY(parser),
6971 .location = PM_LOCATION_NULL_VALUE(parser)
6972 },
6973 .body = { 0 }
6974 };
6975
6976 return node;
6977}
6978
6982static size_t
6983pm_statements_node_body_length(pm_statements_node_t *node) {
6984 return node && node->body.size;
6985}
6986
6990static void
6991pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6992 node->base.location = (pm_location_t) { .start = start, .end = end };
6993}
6994
6999static inline void
7000pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7001 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7002 node->base.location.start = statement->location.start;
7003 }
7004
7005 if (statement->location.end > node->base.location.end) {
7006 node->base.location.end = statement->location.end;
7007 }
7008}
7009
7013static void
7014pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7015 pm_statements_node_body_update(node, statement);
7016
7017 if (node->body.size > 0) {
7018 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7019
7020 switch (PM_NODE_TYPE(previous)) {
7021 case PM_BREAK_NODE:
7022 case PM_NEXT_NODE:
7023 case PM_REDO_NODE:
7024 case PM_RETRY_NODE:
7025 case PM_RETURN_NODE:
7026 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7027 break;
7028 default:
7029 break;
7030 }
7031 }
7032
7033 pm_node_list_append(&node->body, statement);
7034 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7035}
7036
7040static void
7041pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7042 pm_statements_node_body_update(node, statement);
7043 pm_node_list_prepend(&node->body, statement);
7044 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7045}
7046
7050static inline pm_string_node_t *
7051pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7052 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7053 pm_node_flags_t flags = 0;
7054
7055 switch (parser->frozen_string_literal) {
7056 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7057 flags = PM_STRING_FLAGS_MUTABLE;
7058 break;
7059 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7060 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7061 break;
7062 }
7063
7064 *node = (pm_string_node_t) {
7065 {
7066 .type = PM_STRING_NODE,
7067 .flags = flags,
7068 .node_id = PM_NODE_IDENTIFY(parser),
7069 .location = {
7070 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7071 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7072 }
7073 },
7074 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7075 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7076 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7077 .unescaped = *string
7078 };
7079
7080 return node;
7081}
7082
7086static pm_string_node_t *
7087pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7088 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7089}
7090
7095static pm_string_node_t *
7096pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7097 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7098 parser->current_string = PM_STRING_EMPTY;
7099 return node;
7100}
7101
7105static pm_super_node_t *
7106pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7107 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7108 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7109
7110 const uint8_t *end = pm_arguments_end(arguments);
7111 if (end == NULL) {
7112 assert(false && "unreachable");
7113 }
7114
7115 *node = (pm_super_node_t) {
7116 {
7117 .type = PM_SUPER_NODE,
7118 .node_id = PM_NODE_IDENTIFY(parser),
7119 .location = {
7120 .start = keyword->start,
7121 .end = end,
7122 }
7123 },
7124 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7125 .lparen_loc = arguments->opening_loc,
7126 .arguments = arguments->arguments,
7127 .rparen_loc = arguments->closing_loc,
7128 .block = arguments->block
7129 };
7130
7131 return node;
7132}
7133
7138static bool
7139pm_ascii_only_p(const pm_string_t *contents) {
7140 const size_t length = pm_string_length(contents);
7141 const uint8_t *source = pm_string_source(contents);
7142
7143 for (size_t index = 0; index < length; index++) {
7144 if (source[index] & 0x80) return false;
7145 }
7146
7147 return true;
7148}
7149
7153static void
7154parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7155 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7156 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7157
7158 if (width == 0) {
7159 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7160 break;
7161 }
7162
7163 cursor += width;
7164 }
7165}
7166
7171static void
7172parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7173 const pm_encoding_t *encoding = parser->encoding;
7174
7175 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7176 size_t width = encoding->char_width(cursor, end - cursor);
7177
7178 if (width == 0) {
7179 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7180 break;
7181 }
7182
7183 cursor += width;
7184 }
7185}
7186
7196static inline pm_node_flags_t
7197parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7198 if (parser->explicit_encoding != NULL) {
7199 // A Symbol may optionally have its encoding explicitly set. This will
7200 // happen if an escape sequence results in a non-ASCII code point.
7201 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7202 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7203 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7204 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7205 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7206 } else if (validate) {
7207 parse_symbol_encoding_validate_other(parser, location, contents);
7208 }
7209 } else if (pm_ascii_only_p(contents)) {
7210 // Ruby stipulates that all source files must use an ASCII-compatible
7211 // encoding. Thus, all symbols appearing in source are eligible for
7212 // "downgrading" to US-ASCII.
7213 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7214 } else if (validate) {
7215 parse_symbol_encoding_validate_other(parser, location, contents);
7216 }
7217
7218 return 0;
7219}
7220
7221static pm_node_flags_t
7222parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7223 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7224 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7225 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7226 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7227
7228 // There's special validation logic used if a string does not contain any character escape sequences.
7229 if (parser->explicit_encoding == NULL) {
7230 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7231 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7232 // the US-ASCII encoding.
7233 if (ascii_only) {
7234 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7235 }
7236
7237 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7238 if (!ascii_only) {
7239 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7240 }
7241 } else if (parser->encoding != modifier_encoding) {
7242 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7243
7244 if (modifier == 'n' && !ascii_only) {
7245 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7246 }
7247 }
7248
7249 return flags;
7250 }
7251
7252 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7253 bool mixed_encoding = false;
7254
7255 if (mixed_encoding) {
7256 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7257 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7258 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7259 bool valid_string_in_modifier_encoding = true;
7260
7261 if (!valid_string_in_modifier_encoding) {
7262 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7263 }
7264 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7265 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7266 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7267 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7268 }
7269 }
7270
7271 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7272 return flags;
7273}
7274
7281static pm_node_flags_t
7282parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7283 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7284 bool valid_unicode_range = true;
7285 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7286 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7287 return flags;
7288 }
7289
7290 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7291 // to multi-byte characters are allowed.
7292 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7293 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7294 // following error message appearing twice. We do the same for compatibility.
7295 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7296 }
7297
7306 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7307 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7308 }
7309
7310 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7311 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7312 }
7313
7314 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7315 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7316 }
7317
7318 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7319 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7320 }
7321
7322 // At this point no encoding modifiers will be present on the regular expression as they would have already
7323 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7324 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7325 if (ascii_only) {
7326 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7327 }
7328
7329 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7330 // or by specifying a modifier.
7331 //
7332 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7333 if (parser->explicit_encoding != NULL) {
7334 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7335 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7336 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7337 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7338 }
7339 }
7340
7341 return 0;
7342}
7343
7348static pm_symbol_node_t *
7349pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7350 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7351
7352 *node = (pm_symbol_node_t) {
7353 {
7354 .type = PM_SYMBOL_NODE,
7355 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7356 .node_id = PM_NODE_IDENTIFY(parser),
7357 .location = {
7358 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7359 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7360 }
7361 },
7362 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7363 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7364 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7365 .unescaped = *unescaped
7366 };
7367
7368 return node;
7369}
7370
7374static inline pm_symbol_node_t *
7375pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7376 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7377}
7378
7382static pm_symbol_node_t *
7383pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7384 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7385 parser->current_string = PM_STRING_EMPTY;
7386 return node;
7387}
7388
7392static pm_symbol_node_t *
7393pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7394 pm_symbol_node_t *node;
7395
7396 switch (token->type) {
7397 case PM_TOKEN_LABEL: {
7398 pm_token_t opening = not_provided(parser);
7399 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7400
7401 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7402 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7403
7404 assert((label.end - label.start) >= 0);
7405 pm_string_shared_init(&node->unescaped, label.start, label.end);
7406 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7407
7408 break;
7409 }
7410 case PM_TOKEN_MISSING: {
7411 pm_token_t opening = not_provided(parser);
7412 pm_token_t closing = not_provided(parser);
7413
7414 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7415 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7416 break;
7417 }
7418 default:
7419 assert(false && "unreachable");
7420 node = NULL;
7421 break;
7422 }
7423
7424 return node;
7425}
7426
7430static pm_symbol_node_t *
7431pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7432 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7433
7434 *node = (pm_symbol_node_t) {
7435 {
7436 .type = PM_SYMBOL_NODE,
7437 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7438 .node_id = PM_NODE_IDENTIFY(parser),
7439 .location = PM_LOCATION_NULL_VALUE(parser)
7440 },
7441 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7442 .unescaped = { 0 }
7443 };
7444
7445 pm_string_constant_init(&node->unescaped, content, strlen(content));
7446 return node;
7447}
7448
7452static bool
7453pm_symbol_node_label_p(pm_node_t *node) {
7454 const uint8_t *end = NULL;
7455
7456 switch (PM_NODE_TYPE(node)) {
7457 case PM_SYMBOL_NODE:
7458 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7459 break;
7460 case PM_INTERPOLATED_SYMBOL_NODE:
7461 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7462 break;
7463 default:
7464 return false;
7465 }
7466
7467 return (end != NULL) && (end[-1] == ':');
7468}
7469
7473static pm_symbol_node_t *
7474pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7475 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7476
7477 *new_node = (pm_symbol_node_t) {
7478 {
7479 .type = PM_SYMBOL_NODE,
7480 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7481 .node_id = PM_NODE_IDENTIFY(parser),
7482 .location = {
7483 .start = opening->start,
7484 .end = closing->end
7485 }
7486 },
7487 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7488 .value_loc = node->content_loc,
7489 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7490 .unescaped = node->unescaped
7491 };
7492
7493 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7494 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7495
7496 // We are explicitly _not_ using pm_node_destroy here because we don't want
7497 // to trash the unescaped string. We could instead copy the string if we
7498 // know that it is owned, but we're taking the fast path for now.
7499 xfree(node);
7500
7501 return new_node;
7502}
7503
7507static pm_string_node_t *
7508pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7509 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7510 pm_node_flags_t flags = 0;
7511
7512 switch (parser->frozen_string_literal) {
7513 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7514 flags = PM_STRING_FLAGS_MUTABLE;
7515 break;
7516 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7517 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7518 break;
7519 }
7520
7521 *new_node = (pm_string_node_t) {
7522 {
7523 .type = PM_STRING_NODE,
7524 .flags = flags,
7525 .node_id = PM_NODE_IDENTIFY(parser),
7526 .location = node->base.location
7527 },
7528 .opening_loc = node->opening_loc,
7529 .content_loc = node->value_loc,
7530 .closing_loc = node->closing_loc,
7531 .unescaped = node->unescaped
7532 };
7533
7534 // We are explicitly _not_ using pm_node_destroy here because we don't want
7535 // to trash the unescaped string. We could instead copy the string if we
7536 // know that it is owned, but we're taking the fast path for now.
7537 xfree(node);
7538
7539 return new_node;
7540}
7541
7545static pm_true_node_t *
7546pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7547 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7548 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7549
7550 *node = (pm_true_node_t) {{
7551 .type = PM_TRUE_NODE,
7552 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7553 .node_id = PM_NODE_IDENTIFY(parser),
7554 .location = PM_LOCATION_TOKEN_VALUE(token)
7555 }};
7556
7557 return node;
7558}
7559
7563static pm_true_node_t *
7564pm_true_node_synthesized_create(pm_parser_t *parser) {
7565 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7566
7567 *node = (pm_true_node_t) {{
7568 .type = PM_TRUE_NODE,
7569 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7570 .node_id = PM_NODE_IDENTIFY(parser),
7571 .location = { .start = parser->start, .end = parser->end }
7572 }};
7573
7574 return node;
7575}
7576
7580static pm_undef_node_t *
7581pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7582 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7583 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7584
7585 *node = (pm_undef_node_t) {
7586 {
7587 .type = PM_UNDEF_NODE,
7588 .node_id = PM_NODE_IDENTIFY(parser),
7589 .location = PM_LOCATION_TOKEN_VALUE(token),
7590 },
7591 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7592 .names = { 0 }
7593 };
7594
7595 return node;
7596}
7597
7601static void
7602pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7603 node->base.location.end = name->location.end;
7604 pm_node_list_append(&node->names, name);
7605}
7606
7610static pm_unless_node_t *
7611pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7612 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7613 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7614
7615 const uint8_t *end;
7616 if (statements != NULL) {
7617 end = statements->base.location.end;
7618 } else {
7619 end = predicate->location.end;
7620 }
7621
7622 *node = (pm_unless_node_t) {
7623 {
7624 .type = PM_UNLESS_NODE,
7625 .flags = PM_NODE_FLAG_NEWLINE,
7626 .node_id = PM_NODE_IDENTIFY(parser),
7627 .location = {
7628 .start = keyword->start,
7629 .end = end
7630 },
7631 },
7632 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7633 .predicate = predicate,
7634 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7635 .statements = statements,
7636 .else_clause = NULL,
7637 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7638 };
7639
7640 return node;
7641}
7642
7646static pm_unless_node_t *
7647pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7648 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7649 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7650
7651 pm_statements_node_t *statements = pm_statements_node_create(parser);
7652 pm_statements_node_body_append(parser, statements, statement, true);
7653
7654 *node = (pm_unless_node_t) {
7655 {
7656 .type = PM_UNLESS_NODE,
7657 .flags = PM_NODE_FLAG_NEWLINE,
7658 .node_id = PM_NODE_IDENTIFY(parser),
7659 .location = {
7660 .start = statement->location.start,
7661 .end = predicate->location.end
7662 },
7663 },
7664 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7665 .predicate = predicate,
7666 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7667 .statements = statements,
7668 .else_clause = NULL,
7669 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7670 };
7671
7672 return node;
7673}
7674
7675static inline void
7676pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7677 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7678 node->base.location.end = end_keyword->end;
7679}
7680
7686static void
7687pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7688 assert(parser->current_block_exits != NULL);
7689
7690 // All of the block exits that we want to remove should be within the
7691 // statements, and since we are modifying the statements, we shouldn't have
7692 // to check the end location.
7693 const uint8_t *start = statements->base.location.start;
7694
7695 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7696 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7697 if (block_exit->location.start < start) break;
7698
7699 // Implicitly remove from the list by lowering the size.
7700 parser->current_block_exits->size--;
7701 }
7702}
7703
7707static pm_until_node_t *
7708pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7709 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7710 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7711
7712 *node = (pm_until_node_t) {
7713 {
7714 .type = PM_UNTIL_NODE,
7715 .flags = flags,
7716 .node_id = PM_NODE_IDENTIFY(parser),
7717 .location = {
7718 .start = keyword->start,
7719 .end = closing->end,
7720 },
7721 },
7722 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7723 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7724 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7725 .predicate = predicate,
7726 .statements = statements
7727 };
7728
7729 return node;
7730}
7731
7735static pm_until_node_t *
7736pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7737 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7738 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7739 pm_loop_modifier_block_exits(parser, statements);
7740
7741 *node = (pm_until_node_t) {
7742 {
7743 .type = PM_UNTIL_NODE,
7744 .flags = flags,
7745 .node_id = PM_NODE_IDENTIFY(parser),
7746 .location = {
7747 .start = statements->base.location.start,
7748 .end = predicate->location.end,
7749 },
7750 },
7751 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7752 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7753 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7754 .predicate = predicate,
7755 .statements = statements
7756 };
7757
7758 return node;
7759}
7760
7764static pm_when_node_t *
7765pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7766 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7767
7768 *node = (pm_when_node_t) {
7769 {
7770 .type = PM_WHEN_NODE,
7771 .node_id = PM_NODE_IDENTIFY(parser),
7772 .location = {
7773 .start = keyword->start,
7774 .end = NULL
7775 }
7776 },
7777 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7778 .statements = NULL,
7779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7780 .conditions = { 0 }
7781 };
7782
7783 return node;
7784}
7785
7789static void
7790pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7791 node->base.location.end = condition->location.end;
7792 pm_node_list_append(&node->conditions, condition);
7793}
7794
7798static inline void
7799pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7800 node->base.location.end = then_keyword->end;
7801 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7802}
7803
7807static void
7808pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7809 if (statements->base.location.end > node->base.location.end) {
7810 node->base.location.end = statements->base.location.end;
7811 }
7812
7813 node->statements = statements;
7814}
7815
7819static pm_while_node_t *
7820pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7821 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7822 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7823
7824 *node = (pm_while_node_t) {
7825 {
7826 .type = PM_WHILE_NODE,
7827 .flags = flags,
7828 .node_id = PM_NODE_IDENTIFY(parser),
7829 .location = {
7830 .start = keyword->start,
7831 .end = closing->end
7832 },
7833 },
7834 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7835 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7836 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7837 .predicate = predicate,
7838 .statements = statements
7839 };
7840
7841 return node;
7842}
7843
7847static pm_while_node_t *
7848pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7849 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7850 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7851 pm_loop_modifier_block_exits(parser, statements);
7852
7853 *node = (pm_while_node_t) {
7854 {
7855 .type = PM_WHILE_NODE,
7856 .flags = flags,
7857 .node_id = PM_NODE_IDENTIFY(parser),
7858 .location = {
7859 .start = statements->base.location.start,
7860 .end = predicate->location.end
7861 },
7862 },
7863 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7864 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7865 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7866 .predicate = predicate,
7867 .statements = statements
7868 };
7869
7870 return node;
7871}
7872
7876static pm_while_node_t *
7877pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7878 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7879
7880 *node = (pm_while_node_t) {
7881 {
7882 .type = PM_WHILE_NODE,
7883 .node_id = PM_NODE_IDENTIFY(parser),
7884 .location = PM_LOCATION_NULL_VALUE(parser)
7885 },
7886 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7887 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7888 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7889 .predicate = predicate,
7890 .statements = statements
7891 };
7892
7893 return node;
7894}
7895
7900static pm_x_string_node_t *
7901pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7902 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7903
7904 *node = (pm_x_string_node_t) {
7905 {
7906 .type = PM_X_STRING_NODE,
7907 .flags = PM_STRING_FLAGS_FROZEN,
7908 .node_id = PM_NODE_IDENTIFY(parser),
7909 .location = {
7910 .start = opening->start,
7911 .end = closing->end
7912 },
7913 },
7914 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7915 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7916 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7917 .unescaped = *unescaped
7918 };
7919
7920 return node;
7921}
7922
7926static inline pm_x_string_node_t *
7927pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7928 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7929}
7930
7934static pm_yield_node_t *
7935pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7936 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7937
7938 const uint8_t *end;
7939 if (rparen_loc->start != NULL) {
7940 end = rparen_loc->end;
7941 } else if (arguments != NULL) {
7942 end = arguments->base.location.end;
7943 } else if (lparen_loc->start != NULL) {
7944 end = lparen_loc->end;
7945 } else {
7946 end = keyword->end;
7947 }
7948
7949 *node = (pm_yield_node_t) {
7950 {
7951 .type = PM_YIELD_NODE,
7952 .node_id = PM_NODE_IDENTIFY(parser),
7953 .location = {
7954 .start = keyword->start,
7955 .end = end
7956 },
7957 },
7958 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7959 .lparen_loc = *lparen_loc,
7960 .arguments = arguments,
7961 .rparen_loc = *rparen_loc
7962 };
7963
7964 return node;
7965}
7966
7967#undef PM_NODE_ALLOC
7968#undef PM_NODE_IDENTIFY
7969
7974static int
7975pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7976 pm_scope_t *scope = parser->current_scope;
7977 int depth = 0;
7978
7979 while (scope != NULL) {
7980 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7981 if (scope->closed) break;
7982
7983 scope = scope->previous;
7984 depth++;
7985 }
7986
7987 return -1;
7988}
7989
7995static inline int
7996pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7997 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7998}
7999
8003static inline void
8004pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8005 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8006}
8007
8011static pm_constant_id_t
8012pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8013 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8014 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8015 return constant_id;
8016}
8017
8021static inline pm_constant_id_t
8022pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8023 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8024}
8025
8029static pm_constant_id_t
8030pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8031 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8032 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8033 return constant_id;
8034}
8035
8039static pm_constant_id_t
8040pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8041 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8042 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8043 return constant_id;
8044}
8045
8053static bool
8054pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8055 // We want to check whether the parameter name is a numbered parameter or
8056 // not.
8057 pm_refute_numbered_parameter(parser, name->start, name->end);
8058
8059 // Otherwise we'll fetch the constant id for the parameter name and check
8060 // whether it's already in the current scope.
8061 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8062
8063 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8064 // Add an error if the parameter doesn't start with _ and has been seen before
8065 if ((name->start < name->end) && (*name->start != '_')) {
8066 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8067 }
8068 return true;
8069 }
8070 return false;
8071}
8072
8076static void
8077pm_parser_scope_pop(pm_parser_t *parser) {
8078 pm_scope_t *scope = parser->current_scope;
8079 parser->current_scope = scope->previous;
8080 pm_locals_free(&scope->locals);
8081 pm_node_list_free(&scope->implicit_parameters);
8082 xfree(scope);
8083}
8084
8085/******************************************************************************/
8086/* Stack helpers */
8087/******************************************************************************/
8088
8092static inline void
8093pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8094 *stack = (*stack << 1) | (value & 1);
8095}
8096
8100static inline void
8101pm_state_stack_pop(pm_state_stack_t *stack) {
8102 *stack >>= 1;
8103}
8104
8108static inline bool
8109pm_state_stack_p(const pm_state_stack_t *stack) {
8110 return *stack & 1;
8111}
8112
8113static inline void
8114pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8115 // Use the negation of the value to prevent stack overflow.
8116 pm_state_stack_push(&parser->accepts_block_stack, !value);
8117}
8118
8119static inline void
8120pm_accepts_block_stack_pop(pm_parser_t *parser) {
8121 pm_state_stack_pop(&parser->accepts_block_stack);
8122}
8123
8124static inline bool
8125pm_accepts_block_stack_p(pm_parser_t *parser) {
8126 return !pm_state_stack_p(&parser->accepts_block_stack);
8127}
8128
8129static inline void
8130pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8131 pm_state_stack_push(&parser->do_loop_stack, value);
8132}
8133
8134static inline void
8135pm_do_loop_stack_pop(pm_parser_t *parser) {
8136 pm_state_stack_pop(&parser->do_loop_stack);
8137}
8138
8139static inline bool
8140pm_do_loop_stack_p(pm_parser_t *parser) {
8141 return pm_state_stack_p(&parser->do_loop_stack);
8142}
8143
8144/******************************************************************************/
8145/* Lexer check helpers */
8146/******************************************************************************/
8147
8152static inline uint8_t
8153peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8154 if (cursor < parser->end) {
8155 return *cursor;
8156 } else {
8157 return '\0';
8158 }
8159}
8160
8166static inline uint8_t
8167peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8168 return peek_at(parser, parser->current.end + offset);
8169}
8170
8175static inline uint8_t
8176peek(const pm_parser_t *parser) {
8177 return peek_at(parser, parser->current.end);
8178}
8179
8184static inline bool
8185match(pm_parser_t *parser, uint8_t value) {
8186 if (peek(parser) == value) {
8187 parser->current.end++;
8188 return true;
8189 }
8190 return false;
8191}
8192
8197static inline size_t
8198match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8199 if (peek_at(parser, cursor) == '\n') {
8200 return 1;
8201 }
8202 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8203 return 2;
8204 }
8205 return 0;
8206}
8207
8213static inline size_t
8214match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8215 return match_eol_at(parser, parser->current.end + offset);
8216}
8217
8223static inline size_t
8224match_eol(pm_parser_t *parser) {
8225 return match_eol_at(parser, parser->current.end);
8226}
8227
8231static inline const uint8_t *
8232next_newline(const uint8_t *cursor, ptrdiff_t length) {
8233 assert(length >= 0);
8234
8235 // Note that it's okay for us to use memchr here to look for \n because none
8236 // of the encodings that we support have \n as a component of a multi-byte
8237 // character.
8238 return memchr(cursor, '\n', (size_t) length);
8239}
8240
8244static inline bool
8245ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8246 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8247}
8248
8253static bool
8254parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8255 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8256
8257 if (encoding != NULL) {
8258 if (parser->encoding != encoding) {
8259 parser->encoding = encoding;
8260 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8261 }
8262
8263 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8264 return true;
8265 }
8266
8267 return false;
8268}
8269
8274static void
8275parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8276 const uint8_t *cursor = parser->current.start + 1;
8277 const uint8_t *end = parser->current.end;
8278
8279 bool separator = false;
8280 while (true) {
8281 if (end - cursor <= 6) return;
8282 switch (cursor[6]) {
8283 case 'C': case 'c': cursor += 6; continue;
8284 case 'O': case 'o': cursor += 5; continue;
8285 case 'D': case 'd': cursor += 4; continue;
8286 case 'I': case 'i': cursor += 3; continue;
8287 case 'N': case 'n': cursor += 2; continue;
8288 case 'G': case 'g': cursor += 1; continue;
8289 case '=': case ':':
8290 separator = true;
8291 cursor += 6;
8292 break;
8293 default:
8294 cursor += 6;
8295 if (pm_char_is_whitespace(*cursor)) break;
8296 continue;
8297 }
8298 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8299 separator = false;
8300 }
8301
8302 while (true) {
8303 do {
8304 if (++cursor >= end) return;
8305 } while (pm_char_is_whitespace(*cursor));
8306
8307 if (separator) break;
8308 if (*cursor != '=' && *cursor != ':') return;
8309
8310 separator = true;
8311 cursor++;
8312 }
8313
8314 const uint8_t *value_start = cursor;
8315 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8316
8317 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8318 // If we were unable to parse the encoding value, then we've got an
8319 // issue because we didn't understand the encoding that the user was
8320 // trying to use. In this case we'll keep using the default encoding but
8321 // add an error to the parser to indicate an unsuccessful parse.
8322 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8323 }
8324}
8325
8326typedef enum {
8327 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8328 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8329 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8330} pm_magic_comment_boolean_value_t;
8331
8336static pm_magic_comment_boolean_value_t
8337parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8338 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8339 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8340 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8341 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8342 } else {
8343 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8344 }
8345}
8346
8347static inline bool
8348pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8349 return b == '\'' || b == '"' || b == ':' || b == ';';
8350}
8351
8357static inline const uint8_t *
8358parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8359 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8360 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8361 return cursor;
8362 }
8363 cursor++;
8364 }
8365 return NULL;
8366}
8367
8378static inline bool
8379parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8380 bool result = true;
8381
8382 const uint8_t *start = parser->current.start + 1;
8383 const uint8_t *end = parser->current.end;
8384 if (end - start <= 7) return false;
8385
8386 const uint8_t *cursor;
8387 bool indicator = false;
8388
8389 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8390 start = cursor + 3;
8391
8392 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8393 end = cursor;
8394 indicator = true;
8395 } else {
8396 // If we have a start marker but not an end marker, then we cannot
8397 // have a magic comment.
8398 return false;
8399 }
8400 }
8401
8402 cursor = start;
8403 while (cursor < end) {
8404 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8405
8406 const uint8_t *key_start = cursor;
8407 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8408
8409 const uint8_t *key_end = cursor;
8410 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8411 if (cursor == end) break;
8412
8413 if (*cursor == ':') {
8414 cursor++;
8415 } else {
8416 if (!indicator) return false;
8417 continue;
8418 }
8419
8420 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8421 if (cursor == end) break;
8422
8423 const uint8_t *value_start;
8424 const uint8_t *value_end;
8425
8426 if (*cursor == '"') {
8427 value_start = ++cursor;
8428 for (; cursor < end && *cursor != '"'; cursor++) {
8429 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8430 }
8431 value_end = cursor;
8432 if (*cursor == '"') cursor++;
8433 } else {
8434 value_start = cursor;
8435 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8436 value_end = cursor;
8437 }
8438
8439 if (indicator) {
8440 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8441 } else {
8442 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8443 if (cursor != end) return false;
8444 }
8445
8446 // Here, we need to do some processing on the key to swap out dashes for
8447 // underscores. We only need to do this if there _is_ a dash in the key.
8448 pm_string_t key;
8449 const size_t key_length = (size_t) (key_end - key_start);
8450 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8451
8452 if (dash == NULL) {
8453 pm_string_shared_init(&key, key_start, key_end);
8454 } else {
8455 uint8_t *buffer = xmalloc(key_length);
8456 if (buffer == NULL) break;
8457
8458 memcpy(buffer, key_start, key_length);
8459 buffer[dash - key_start] = '_';
8460
8461 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8462 buffer[dash - key_start] = '_';
8463 }
8464
8465 pm_string_owned_init(&key, buffer, key_length);
8466 }
8467
8468 // Finally, we can start checking the key against the list of known
8469 // magic comment keys, and potentially change state based on that.
8470 const uint8_t *key_source = pm_string_source(&key);
8471 uint32_t value_length = (uint32_t) (value_end - value_start);
8472
8473 // We only want to attempt to compare against encoding comments if it's
8474 // the first line in the file (or the second in the case of a shebang).
8475 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8476 if (
8477 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8478 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8479 ) {
8480 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8481 }
8482 }
8483
8484 if (key_length == 11) {
8485 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8486 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8487 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8488 PM_PARSER_WARN_TOKEN_FORMAT(
8489 parser,
8490 parser->current,
8491 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8492 (int) key_length,
8493 (const char *) key_source,
8494 (int) value_length,
8495 (const char *) value_start
8496 );
8497 break;
8498 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8499 parser->warn_mismatched_indentation = false;
8500 break;
8501 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8502 parser->warn_mismatched_indentation = true;
8503 break;
8504 }
8505 }
8506 } else if (key_length == 21) {
8507 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8508 // We only want to handle frozen string literal comments if it's
8509 // before any semantic tokens have been seen.
8510 if (semantic_token_seen) {
8511 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8512 } else {
8513 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8514 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8515 PM_PARSER_WARN_TOKEN_FORMAT(
8516 parser,
8517 parser->current,
8518 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8519 (int) key_length,
8520 (const char *) key_source,
8521 (int) value_length,
8522 (const char *) value_start
8523 );
8524 break;
8525 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8527 break;
8528 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8530 break;
8531 }
8532 }
8533 }
8534 } else if (key_length == 24) {
8535 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8536 const uint8_t *cursor = parser->current.start;
8537 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8538
8539 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8540 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8541 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8542 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8543 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8544 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8545 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8546 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8547 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8548 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8549 } else {
8550 PM_PARSER_WARN_TOKEN_FORMAT(
8551 parser,
8552 parser->current,
8553 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8554 (int) key_length,
8555 (const char *) key_source,
8556 (int) value_length,
8557 (const char *) value_start
8558 );
8559 }
8560 }
8561 }
8562
8563 // When we're done, we want to free the string in case we had to
8564 // allocate memory for it.
8565 pm_string_free(&key);
8566
8567 // Allocate a new magic comment node to append to the parser's list.
8569 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8570 magic_comment->key_start = key_start;
8571 magic_comment->value_start = value_start;
8572 magic_comment->key_length = (uint32_t) key_length;
8573 magic_comment->value_length = value_length;
8574 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8575 }
8576 }
8577
8578 return result;
8579}
8580
8581/******************************************************************************/
8582/* Context manipulations */
8583/******************************************************************************/
8584
8585static bool
8586context_terminator(pm_context_t context, pm_token_t *token) {
8587 switch (context) {
8588 case PM_CONTEXT_MAIN:
8590 case PM_CONTEXT_DEFINED:
8592 case PM_CONTEXT_TERNARY:
8594 return token->type == PM_TOKEN_EOF;
8596 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8597 case PM_CONTEXT_PREEXE:
8598 case PM_CONTEXT_POSTEXE:
8599 return token->type == PM_TOKEN_BRACE_RIGHT;
8600 case PM_CONTEXT_MODULE:
8601 case PM_CONTEXT_CLASS:
8602 case PM_CONTEXT_SCLASS:
8604 case PM_CONTEXT_DEF:
8606 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8607 case PM_CONTEXT_WHILE:
8608 case PM_CONTEXT_UNTIL:
8609 case PM_CONTEXT_ELSE:
8610 case PM_CONTEXT_FOR:
8618 return token->type == PM_TOKEN_KEYWORD_END;
8620 return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8622 return token->type == PM_TOKEN_KEYWORD_IN;
8624 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8625 case PM_CONTEXT_CASE_IN:
8626 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8627 case PM_CONTEXT_IF:
8628 case PM_CONTEXT_ELSIF:
8629 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8630 case PM_CONTEXT_UNLESS:
8631 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8632 case PM_CONTEXT_EMBEXPR:
8633 return token->type == PM_TOKEN_EMBEXPR_END;
8635 return token->type == PM_TOKEN_BRACE_RIGHT;
8636 case PM_CONTEXT_PARENS:
8637 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8638 case PM_CONTEXT_BEGIN:
8646 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8654 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8656 return token->type == PM_TOKEN_BRACE_RIGHT;
8658 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8659 case PM_CONTEXT_NONE:
8660 return false;
8661 }
8662
8663 return false;
8664}
8665
8670static pm_context_t
8671context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8672 pm_context_node_t *context_node = parser->current_context;
8673
8674 while (context_node != NULL) {
8675 if (context_terminator(context_node->context, token)) return context_node->context;
8676 context_node = context_node->prev;
8677 }
8678
8679 return PM_CONTEXT_NONE;
8680}
8681
8682static bool
8683context_push(pm_parser_t *parser, pm_context_t context) {
8684 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8685 if (context_node == NULL) return false;
8686
8687 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8688
8689 if (parser->current_context == NULL) {
8690 parser->current_context = context_node;
8691 } else {
8692 context_node->prev = parser->current_context;
8693 parser->current_context = context_node;
8694 }
8695
8696 return true;
8697}
8698
8699static void
8700context_pop(pm_parser_t *parser) {
8701 pm_context_node_t *prev = parser->current_context->prev;
8702 xfree(parser->current_context);
8703 parser->current_context = prev;
8704}
8705
8706static bool
8707context_p(const pm_parser_t *parser, pm_context_t context) {
8708 pm_context_node_t *context_node = parser->current_context;
8709
8710 while (context_node != NULL) {
8711 if (context_node->context == context) return true;
8712 context_node = context_node->prev;
8713 }
8714
8715 return false;
8716}
8717
8718static bool
8719context_def_p(const pm_parser_t *parser) {
8720 pm_context_node_t *context_node = parser->current_context;
8721
8722 while (context_node != NULL) {
8723 switch (context_node->context) {
8724 case PM_CONTEXT_DEF:
8729 return true;
8730 case PM_CONTEXT_CLASS:
8734 case PM_CONTEXT_MODULE:
8738 case PM_CONTEXT_SCLASS:
8742 return false;
8743 default:
8744 context_node = context_node->prev;
8745 }
8746 }
8747
8748 return false;
8749}
8750
8755static const char *
8756context_human(pm_context_t context) {
8757 switch (context) {
8758 case PM_CONTEXT_NONE:
8759 assert(false && "unreachable");
8760 return "";
8761 case PM_CONTEXT_BEGIN: return "begin statement";
8762 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8763 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8764 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8765 case PM_CONTEXT_CASE_IN: return "'in' clause";
8766 case PM_CONTEXT_CLASS: return "class definition";
8767 case PM_CONTEXT_DEF: return "method definition";
8768 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8769 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8770 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8771 case PM_CONTEXT_ELSE:
8778 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8779 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8780 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8787 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8788 case PM_CONTEXT_FOR: return "for loop";
8789 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8790 case PM_CONTEXT_IF: return "if statement";
8791 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8792 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8793 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8794 case PM_CONTEXT_MAIN: return "top level context";
8795 case PM_CONTEXT_MODULE: return "module definition";
8796 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8797 case PM_CONTEXT_PARENS: return "parentheses";
8798 case PM_CONTEXT_POSTEXE: return "'END' block";
8799 case PM_CONTEXT_PREDICATE: return "predicate";
8800 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8808 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8809 case PM_CONTEXT_SCLASS: return "singleton class definition";
8810 case PM_CONTEXT_TERNARY: return "ternary expression";
8811 case PM_CONTEXT_UNLESS: return "unless statement";
8812 case PM_CONTEXT_UNTIL: return "until statement";
8813 case PM_CONTEXT_WHILE: return "while statement";
8814 }
8815
8816 assert(false && "unreachable");
8817 return "";
8818}
8819
8820/******************************************************************************/
8821/* Specific token lexers */
8822/******************************************************************************/
8823
8824static inline void
8825pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8826 if (invalid != NULL) {
8827 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8828 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8829 }
8830}
8831
8832static size_t
8833pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8834 const uint8_t *invalid = NULL;
8835 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8836 pm_strspn_number_validate(parser, string, length, invalid);
8837 return length;
8838}
8839
8840static size_t
8841pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8842 const uint8_t *invalid = NULL;
8843 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8844 pm_strspn_number_validate(parser, string, length, invalid);
8845 return length;
8846}
8847
8848static size_t
8849pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8850 const uint8_t *invalid = NULL;
8851 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8852 pm_strspn_number_validate(parser, string, length, invalid);
8853 return length;
8854}
8855
8856static size_t
8857pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8858 const uint8_t *invalid = NULL;
8859 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8860 pm_strspn_number_validate(parser, string, length, invalid);
8861 return length;
8862}
8863
8864static pm_token_type_t
8865lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8867
8868 // Here we're going to attempt to parse the optional decimal portion of a
8869 // float. If it's not there, then it's okay and we'll just continue on.
8870 if (peek(parser) == '.') {
8871 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8872 parser->current.end += 2;
8873 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8875 } else {
8876 // If we had a . and then something else, then it's not a float
8877 // suffix on a number it's a method call or something else.
8878 return type;
8879 }
8880 }
8881
8882 // Here we're going to attempt to parse the optional exponent portion of a
8883 // float. If it's not there, it's okay and we'll just continue on.
8884 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8885 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8886 parser->current.end += 2;
8887
8888 if (pm_char_is_decimal_digit(peek(parser))) {
8889 parser->current.end++;
8890 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8891 } else {
8892 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8893 }
8894 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8895 parser->current.end++;
8896 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8897 } else {
8898 return type;
8899 }
8900
8901 *seen_e = true;
8903 }
8904
8905 return type;
8906}
8907
8908static pm_token_type_t
8909lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8911 *seen_e = false;
8912
8913 if (peek_offset(parser, -1) == '0') {
8914 switch (*parser->current.end) {
8915 // 0d1111 is a decimal number
8916 case 'd':
8917 case 'D':
8918 parser->current.end++;
8919 if (pm_char_is_decimal_digit(peek(parser))) {
8920 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8921 } else {
8922 match(parser, '_');
8923 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8924 }
8925
8926 break;
8927
8928 // 0b1111 is a binary number
8929 case 'b':
8930 case 'B':
8931 parser->current.end++;
8932 if (pm_char_is_binary_digit(peek(parser))) {
8933 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8934 } else {
8935 match(parser, '_');
8936 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8937 }
8938
8940 break;
8941
8942 // 0o1111 is an octal number
8943 case 'o':
8944 case 'O':
8945 parser->current.end++;
8946 if (pm_char_is_octal_digit(peek(parser))) {
8947 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8948 } else {
8949 match(parser, '_');
8950 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8951 }
8952
8954 break;
8955
8956 // 01111 is an octal number
8957 case '_':
8958 case '0':
8959 case '1':
8960 case '2':
8961 case '3':
8962 case '4':
8963 case '5':
8964 case '6':
8965 case '7':
8966 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8968 break;
8969
8970 // 0x1111 is a hexadecimal number
8971 case 'x':
8972 case 'X':
8973 parser->current.end++;
8974 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8975 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8976 } else {
8977 match(parser, '_');
8978 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8979 }
8980
8982 break;
8983
8984 // 0.xxx is a float
8985 case '.': {
8986 type = lex_optional_float_suffix(parser, seen_e);
8987 break;
8988 }
8989
8990 // 0exxx is a float
8991 case 'e':
8992 case 'E': {
8993 type = lex_optional_float_suffix(parser, seen_e);
8994 break;
8995 }
8996 }
8997 } else {
8998 // If it didn't start with a 0, then we'll lex as far as we can into a
8999 // decimal number.
9000 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
9001
9002 // Afterward, we'll lex as far as we can into an optional float suffix.
9003 type = lex_optional_float_suffix(parser, seen_e);
9004 }
9005
9006 // At this point we have a completed number, but we want to provide the user
9007 // with a good experience if they put an additional .xxx fractional
9008 // component on the end, so we'll check for that here.
9009 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
9010 const uint8_t *fraction_start = parser->current.end;
9011 const uint8_t *fraction_end = parser->current.end + 2;
9012 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
9013 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
9014 }
9015
9016 return type;
9017}
9018
9019static pm_token_type_t
9020lex_numeric(pm_parser_t *parser) {
9023
9024 if (parser->current.end < parser->end) {
9025 bool seen_e = false;
9026 type = lex_numeric_prefix(parser, &seen_e);
9027
9028 const uint8_t *end = parser->current.end;
9029 pm_token_type_t suffix_type = type;
9030
9031 if (type == PM_TOKEN_INTEGER) {
9032 if (match(parser, 'r')) {
9033 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9034
9035 if (match(parser, 'i')) {
9037 }
9038 } else if (match(parser, 'i')) {
9039 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9040 }
9041 } else {
9042 if (!seen_e && match(parser, 'r')) {
9043 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9044
9045 if (match(parser, 'i')) {
9047 }
9048 } else if (match(parser, 'i')) {
9049 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9050 }
9051 }
9052
9053 const uint8_t b = peek(parser);
9054 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9055 parser->current.end = end;
9056 } else {
9057 type = suffix_type;
9058 }
9059 }
9060
9061 return type;
9062}
9063
9064static pm_token_type_t
9065lex_global_variable(pm_parser_t *parser) {
9066 if (parser->current.end >= parser->end) {
9067 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9069 }
9070
9071 // True if multiple characters are allowed after the declaration of the
9072 // global variable. Not true when it starts with "$-".
9073 bool allow_multiple = true;
9074
9075 switch (*parser->current.end) {
9076 case '~': // $~: match-data
9077 case '*': // $*: argv
9078 case '$': // $$: pid
9079 case '?': // $?: last status
9080 case '!': // $!: error string
9081 case '@': // $@: error position
9082 case '/': // $/: input record separator
9083 case '\\': // $\: output record separator
9084 case ';': // $;: field separator
9085 case ',': // $,: output field separator
9086 case '.': // $.: last read line number
9087 case '=': // $=: ignorecase
9088 case ':': // $:: load path
9089 case '<': // $<: reading filename
9090 case '>': // $>: default output handle
9091 case '\"': // $": already loaded files
9092 parser->current.end++;
9094
9095 case '&': // $&: last match
9096 case '`': // $`: string before last match
9097 case '\'': // $': string after last match
9098 case '+': // $+: string matches last paren.
9099 parser->current.end++;
9100 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9101
9102 case '0': {
9103 parser->current.end++;
9104 size_t width;
9105
9106 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9107 do {
9108 parser->current.end += width;
9109 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9110
9111 // $0 isn't allowed to be followed by anything.
9112 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9113 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9114 }
9115
9117 }
9118
9119 case '1':
9120 case '2':
9121 case '3':
9122 case '4':
9123 case '5':
9124 case '6':
9125 case '7':
9126 case '8':
9127 case '9':
9128 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9129 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9130
9131 case '-':
9132 parser->current.end++;
9133 allow_multiple = false;
9135 default: {
9136 size_t width;
9137
9138 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9139 do {
9140 parser->current.end += width;
9141 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9142 } else if (pm_char_is_whitespace(peek(parser))) {
9143 // If we get here, then we have a $ followed by whitespace,
9144 // which is not allowed.
9145 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9146 } else {
9147 // If we get here, then we have a $ followed by something that
9148 // isn't recognized as a global variable.
9149 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9150 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9151 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9152 }
9153
9155 }
9156 }
9157}
9158
9171static inline pm_token_type_t
9172lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9173 if (memcmp(current_start, value, vlen) == 0) {
9174 pm_lex_state_t last_state = parser->lex_state;
9175
9176 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9177 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9178 } else {
9179 lex_state_set(parser, state);
9180 if (state == PM_LEX_STATE_BEG) {
9181 parser->command_start = true;
9182 }
9183
9184 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9185 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9186 return modifier_type;
9187 }
9188 }
9189
9190 return type;
9191 }
9192
9193 return PM_TOKEN_EOF;
9194}
9195
9196static pm_token_type_t
9197lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9198 // Lex as far as we can into the current identifier.
9199 size_t width;
9200 const uint8_t *end = parser->end;
9201 const uint8_t *current_start = parser->current.start;
9202 const uint8_t *current_end = parser->current.end;
9203 bool encoding_changed = parser->encoding_changed;
9204
9205 if (encoding_changed) {
9206 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9207 current_end += width;
9208 }
9209 } else {
9210 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9211 current_end += width;
9212 }
9213 }
9214 parser->current.end = current_end;
9215
9216 // Now cache the length of the identifier so that we can quickly compare it
9217 // against known keywords.
9218 width = (size_t) (current_end - current_start);
9219
9220 if (current_end < end) {
9221 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9222 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9223 // check if we're returning the defined? keyword or just an identifier.
9224 width++;
9225
9226 if (
9227 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9228 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9229 ) {
9230 // If we're in a position where we can accept a : at the end of an
9231 // identifier, then we'll optionally accept it.
9232 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9233 (void) match(parser, ':');
9234 return PM_TOKEN_LABEL;
9235 }
9236
9237 if (parser->lex_state != PM_LEX_STATE_DOT) {
9238 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9240 }
9241 }
9242
9243 return PM_TOKEN_METHOD_NAME;
9244 }
9245
9246 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9247 // If we're in a position where we can accept a = at the end of an
9248 // identifier, then we'll optionally accept it.
9249 return PM_TOKEN_IDENTIFIER;
9250 }
9251
9252 if (
9253 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9254 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9255 ) {
9256 // If we're in a position where we can accept a : at the end of an
9257 // identifier, then we'll optionally accept it.
9258 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9259 (void) match(parser, ':');
9260 return PM_TOKEN_LABEL;
9261 }
9262 }
9263
9264 if (parser->lex_state != PM_LEX_STATE_DOT) {
9266 switch (width) {
9267 case 2:
9268 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9269 if (pm_do_loop_stack_p(parser)) {
9271 }
9272 return PM_TOKEN_KEYWORD_DO;
9273 }
9274
9275 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 break;
9279 case 3:
9280 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 break;
9288 case 4:
9289 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 break;
9298 case 5:
9299 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9302 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9305 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9306 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9308 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9309 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9310 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9311 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9312 break;
9313 case 6:
9314 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9315 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9316 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9317 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9318 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9319 break;
9320 case 8:
9321 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9322 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9323 break;
9324 case 12:
9325 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9326 break;
9327 }
9328 }
9329
9330 if (encoding_changed) {
9331 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9332 }
9333 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9334}
9335
9340static bool
9341current_token_starts_line(pm_parser_t *parser) {
9342 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9343}
9344
9359static pm_token_type_t
9360lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9361 // If there is no content following this #, then we're at the end of
9362 // the string and we can safely return string content.
9363 if (pound + 1 >= parser->end) {
9364 parser->current.end = pound + 1;
9366 }
9367
9368 // Now we'll check against the character that follows the #. If it constitutes
9369 // valid interplation, we'll handle that, otherwise we'll return
9370 // PM_TOKEN_NOT_PROVIDED.
9371 switch (pound[1]) {
9372 case '@': {
9373 // In this case we may have hit an embedded instance or class variable.
9374 if (pound + 2 >= parser->end) {
9375 parser->current.end = pound + 1;
9377 }
9378
9379 // If we're looking at a @ and there's another @, then we'll skip past the
9380 // second @.
9381 const uint8_t *variable = pound + 2;
9382 if (*variable == '@' && pound + 3 < parser->end) variable++;
9383
9384 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9385 // At this point we're sure that we've either hit an embedded instance
9386 // or class variable. In this case we'll first need to check if we've
9387 // already consumed content.
9388 if (pound > parser->current.start) {
9389 parser->current.end = pound;
9391 }
9392
9393 // Otherwise we need to return the embedded variable token
9394 // and then switch to the embedded variable lex mode.
9395 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9396 parser->current.end = pound + 1;
9397 return PM_TOKEN_EMBVAR;
9398 }
9399
9400 // If we didn't get a valid interpolation, then this is just regular
9401 // string content. This is like if we get "#@-". In this case the caller
9402 // should keep lexing.
9403 parser->current.end = pound + 1;
9404 return PM_TOKEN_NOT_PROVIDED;
9405 }
9406 case '$':
9407 // In this case we may have hit an embedded global variable. If there's
9408 // not enough room, then we'll just return string content.
9409 if (pound + 2 >= parser->end) {
9410 parser->current.end = pound + 1;
9412 }
9413
9414 // This is the character that we're going to check to see if it is the
9415 // start of an identifier that would indicate that this is a global
9416 // variable.
9417 const uint8_t *check = pound + 2;
9418
9419 if (pound[2] == '-') {
9420 if (pound + 3 >= parser->end) {
9421 parser->current.end = pound + 2;
9423 }
9424
9425 check++;
9426 }
9427
9428 // If the character that we're going to check is the start of an
9429 // identifier, or we don't have a - and the character is a decimal number
9430 // or a global name punctuation character, then we've hit an embedded
9431 // global variable.
9432 if (
9433 char_is_identifier_start(parser, check, parser->end - check) ||
9434 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9435 ) {
9436 // In this case we've hit an embedded global variable. First check to
9437 // see if we've already consumed content. If we have, then we need to
9438 // return that content as string content first.
9439 if (pound > parser->current.start) {
9440 parser->current.end = pound;
9442 }
9443
9444 // Otherwise, we need to return the embedded variable token and switch
9445 // to the embedded variable lex mode.
9446 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9447 parser->current.end = pound + 1;
9448 return PM_TOKEN_EMBVAR;
9449 }
9450
9451 // In this case we've hit a #$ that does not indicate a global variable.
9452 // In this case we'll continue lexing past it.
9453 parser->current.end = pound + 1;
9454 return PM_TOKEN_NOT_PROVIDED;
9455 case '{':
9456 // In this case it's the start of an embedded expression. If we have
9457 // already consumed content, then we need to return that content as string
9458 // content first.
9459 if (pound > parser->current.start) {
9460 parser->current.end = pound;
9462 }
9463
9464 parser->enclosure_nesting++;
9465
9466 // Otherwise we'll skip past the #{ and begin lexing the embedded
9467 // expression.
9468 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9469 parser->current.end = pound + 2;
9470 parser->command_start = true;
9471 pm_do_loop_stack_push(parser, false);
9473 default:
9474 // In this case we've hit a # that doesn't constitute interpolation. We'll
9475 // mark that by returning the not provided token type. This tells the
9476 // consumer to keep lexing forward.
9477 parser->current.end = pound + 1;
9478 return PM_TOKEN_NOT_PROVIDED;
9479 }
9480}
9481
9482static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9483static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9484static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9485static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9486static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9487
9491static const bool ascii_printable_chars[] = {
9492 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9496 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9497 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9499 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9500};
9501
9502static inline bool
9503char_is_ascii_printable(const uint8_t b) {
9504 return (b < 0x80) && ascii_printable_chars[b];
9505}
9506
9511static inline uint8_t
9512escape_hexadecimal_digit(const uint8_t value) {
9513 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9514}
9515
9521static inline uint32_t
9522escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9523 uint32_t value = 0;
9524 for (size_t index = 0; index < length; index++) {
9525 if (index != 0) value <<= 4;
9526 value |= escape_hexadecimal_digit(string[index]);
9527 }
9528
9529 // Here we're going to verify that the value is actually a valid Unicode
9530 // codepoint and not a surrogate pair.
9531 if (value >= 0xD800 && value <= 0xDFFF) {
9532 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9533 return 0xFFFD;
9534 }
9535
9536 return value;
9537}
9538
9542static inline uint8_t
9543escape_byte(uint8_t value, const uint8_t flags) {
9544 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9545 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9546 return value;
9547}
9548
9552static inline void
9553escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9554 // \u escape sequences in string-like structures implicitly change the
9555 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9556 // literal.
9557 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9558 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9559 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9560 }
9561
9563 }
9564
9565 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9566 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9567 pm_buffer_append_byte(buffer, 0xEF);
9568 pm_buffer_append_byte(buffer, 0xBF);
9569 pm_buffer_append_byte(buffer, 0xBD);
9570 }
9571}
9572
9577static inline void
9578escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9579 if (byte >= 0x80) {
9580 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9581 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9582 }
9583
9584 parser->explicit_encoding = parser->encoding;
9585 }
9586
9587 pm_buffer_append_byte(buffer, byte);
9588}
9589
9605static inline void
9606escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9607 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9608 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9609 }
9610
9611 escape_write_byte_encoded(parser, buffer, byte);
9612}
9613
9617static inline void
9618escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9619 size_t width;
9620 if (parser->encoding_changed) {
9621 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9622 } else {
9623 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9624 }
9625
9626 if (width == 1) {
9627 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9628 } else if (width > 1) {
9629 // Valid multibyte character. Just ignore escape.
9630 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9631 pm_buffer_append_bytes(b, parser->current.end, width);
9632 parser->current.end += width;
9633 } else {
9634 // Assume the next character wasn't meant to be part of this escape
9635 // sequence since it is invalid. Add an error and move on.
9636 parser->current.end++;
9637 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9638 }
9639}
9640
9646static void
9647escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9648#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9649
9650 PM_PARSER_WARN_TOKEN_FORMAT(
9651 parser,
9652 parser->current,
9653 PM_WARN_INVALID_CHARACTER,
9654 FLAG(flags),
9655 FLAG(flag),
9656 type
9657 );
9658
9659#undef FLAG
9660}
9661
9665static void
9666escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9667 uint8_t peeked = peek(parser);
9668 switch (peeked) {
9669 case '\\': {
9670 parser->current.end++;
9671 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9672 return;
9673 }
9674 case '\'': {
9675 parser->current.end++;
9676 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9677 return;
9678 }
9679 case 'a': {
9680 parser->current.end++;
9681 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9682 return;
9683 }
9684 case 'b': {
9685 parser->current.end++;
9686 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9687 return;
9688 }
9689 case 'e': {
9690 parser->current.end++;
9691 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9692 return;
9693 }
9694 case 'f': {
9695 parser->current.end++;
9696 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9697 return;
9698 }
9699 case 'n': {
9700 parser->current.end++;
9701 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9702 return;
9703 }
9704 case 'r': {
9705 parser->current.end++;
9706 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9707 return;
9708 }
9709 case 's': {
9710 parser->current.end++;
9711 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9712 return;
9713 }
9714 case 't': {
9715 parser->current.end++;
9716 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9717 return;
9718 }
9719 case 'v': {
9720 parser->current.end++;
9721 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9722 return;
9723 }
9724 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9725 uint8_t value = (uint8_t) (*parser->current.end - '0');
9726 parser->current.end++;
9727
9728 if (pm_char_is_octal_digit(peek(parser))) {
9729 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9730 parser->current.end++;
9731
9732 if (pm_char_is_octal_digit(peek(parser))) {
9733 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9734 parser->current.end++;
9735 }
9736 }
9737
9738 value = escape_byte(value, flags);
9739 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9740 return;
9741 }
9742 case 'x': {
9743 const uint8_t *start = parser->current.end - 1;
9744
9745 parser->current.end++;
9746 uint8_t byte = peek(parser);
9747
9748 if (pm_char_is_hexadecimal_digit(byte)) {
9749 uint8_t value = escape_hexadecimal_digit(byte);
9750 parser->current.end++;
9751
9752 byte = peek(parser);
9753 if (pm_char_is_hexadecimal_digit(byte)) {
9754 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9755 parser->current.end++;
9756 }
9757
9758 value = escape_byte(value, flags);
9759 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9760 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9761 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9762 } else {
9763 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9764 }
9765 }
9766
9767 escape_write_byte_encoded(parser, buffer, value);
9768 } else {
9769 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9770 }
9771
9772 return;
9773 }
9774 case 'u': {
9775 const uint8_t *start = parser->current.end - 1;
9776 parser->current.end++;
9777
9778 if (parser->current.end == parser->end) {
9779 const uint8_t *start = parser->current.end - 2;
9780 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9781 } else if (peek(parser) == '{') {
9782 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9783 parser->current.end++;
9784
9785 size_t whitespace;
9786 while (true) {
9787 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9788 parser->current.end += whitespace;
9789 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9790 // This is super hacky, but it gets us nicer error
9791 // messages because we can still pass it off to the
9792 // regular expression engine even if we hit an
9793 // unterminated regular expression.
9794 parser->current.end += 2;
9795 } else {
9796 break;
9797 }
9798 }
9799
9800 const uint8_t *extra_codepoints_start = NULL;
9801 int codepoints_count = 0;
9802
9803 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9804 const uint8_t *unicode_start = parser->current.end;
9805 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9806
9807 if (hexadecimal_length > 6) {
9808 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9809 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9810 } else if (hexadecimal_length == 0) {
9811 // there are not hexadecimal characters
9812
9813 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9814 // If this is a regular expression, we are going to
9815 // let the regular expression engine handle this
9816 // error instead of us.
9817 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9818 } else {
9819 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9820 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9821 }
9822
9823 return;
9824 }
9825
9826 parser->current.end += hexadecimal_length;
9827 codepoints_count++;
9828 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9829 extra_codepoints_start = unicode_start;
9830 }
9831
9832 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9833 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9834
9835 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9836 }
9837
9838 // ?\u{nnnn} character literal should contain only one codepoint
9839 // and cannot be like ?\u{nnnn mmmm}.
9840 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9841 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9842 }
9843
9844 if (parser->current.end == parser->end) {
9845 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9846 } else if (peek(parser) == '}') {
9847 parser->current.end++;
9848 } else {
9849 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9850 // If this is a regular expression, we are going to let
9851 // the regular expression engine handle this error
9852 // instead of us.
9853 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9854 } else {
9855 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9856 }
9857 }
9858
9859 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9860 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9861 }
9862 } else {
9863 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9864
9865 if (length == 0) {
9866 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9867 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9868 } else {
9869 const uint8_t *start = parser->current.end - 2;
9870 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9871 }
9872 } else if (length == 4) {
9873 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9874
9875 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9876 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9877 }
9878
9879 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9880 parser->current.end += 4;
9881 } else {
9882 parser->current.end += length;
9883
9884 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9885 // If this is a regular expression, we are going to let
9886 // the regular expression engine handle this error
9887 // instead of us.
9888 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9889 } else {
9890 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9891 }
9892 }
9893 }
9894
9895 return;
9896 }
9897 case 'c': {
9898 parser->current.end++;
9899 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9900 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9901 }
9902
9903 if (parser->current.end == parser->end) {
9904 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9905 return;
9906 }
9907
9908 uint8_t peeked = peek(parser);
9909 switch (peeked) {
9910 case '?': {
9911 parser->current.end++;
9912 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9913 return;
9914 }
9915 case '\\':
9916 parser->current.end++;
9917
9918 if (match(parser, 'u') || match(parser, 'U')) {
9919 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9920 return;
9921 }
9922
9923 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9924 return;
9925 case ' ':
9926 parser->current.end++;
9927 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9928 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9929 return;
9930 case '\t':
9931 parser->current.end++;
9932 escape_read_warn(parser, flags, 0, "\\t");
9933 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9934 return;
9935 default: {
9936 if (!char_is_ascii_printable(peeked)) {
9937 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9938 return;
9939 }
9940
9941 parser->current.end++;
9942 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9943 return;
9944 }
9945 }
9946 }
9947 case 'C': {
9948 parser->current.end++;
9949 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9950 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9951 }
9952
9953 if (peek(parser) != '-') {
9954 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9955 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9956 return;
9957 }
9958
9959 parser->current.end++;
9960 if (parser->current.end == parser->end) {
9961 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9962 return;
9963 }
9964
9965 uint8_t peeked = peek(parser);
9966 switch (peeked) {
9967 case '?': {
9968 parser->current.end++;
9969 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9970 return;
9971 }
9972 case '\\':
9973 parser->current.end++;
9974
9975 if (match(parser, 'u') || match(parser, 'U')) {
9976 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9977 return;
9978 }
9979
9980 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9981 return;
9982 case ' ':
9983 parser->current.end++;
9984 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9985 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9986 return;
9987 case '\t':
9988 parser->current.end++;
9989 escape_read_warn(parser, flags, 0, "\\t");
9990 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9991 return;
9992 default: {
9993 if (!char_is_ascii_printable(peeked)) {
9994 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9995 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9996 return;
9997 }
9998
9999 parser->current.end++;
10000 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
10001 return;
10002 }
10003 }
10004 }
10005 case 'M': {
10006 parser->current.end++;
10007 if (flags & PM_ESCAPE_FLAG_META) {
10008 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10009 }
10010
10011 if (peek(parser) != '-') {
10012 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10013 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10014 return;
10015 }
10016
10017 parser->current.end++;
10018 if (parser->current.end == parser->end) {
10019 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10020 return;
10021 }
10022
10023 uint8_t peeked = peek(parser);
10024 switch (peeked) {
10025 case '\\':
10026 parser->current.end++;
10027
10028 if (match(parser, 'u') || match(parser, 'U')) {
10029 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10030 return;
10031 }
10032
10033 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10034 return;
10035 case ' ':
10036 parser->current.end++;
10037 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10039 return;
10040 case '\t':
10041 parser->current.end++;
10042 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10043 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10044 return;
10045 default:
10046 if (!char_is_ascii_printable(peeked)) {
10047 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10048 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10049 return;
10050 }
10051
10052 parser->current.end++;
10053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10054 return;
10055 }
10056 }
10057 case '\r': {
10058 if (peek_offset(parser, 1) == '\n') {
10059 parser->current.end += 2;
10060 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10061 return;
10062 }
10064 }
10065 default: {
10066 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10067 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10068 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10069 return;
10070 }
10071 if (parser->current.end < parser->end) {
10072 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10073 } else {
10074 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10075 }
10076 return;
10077 }
10078 }
10079}
10080
10106static pm_token_type_t
10107lex_question_mark(pm_parser_t *parser) {
10108 if (lex_state_end_p(parser)) {
10109 lex_state_set(parser, PM_LEX_STATE_BEG);
10111 }
10112
10113 if (parser->current.end >= parser->end) {
10114 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10115 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10117 }
10118
10119 if (pm_char_is_whitespace(*parser->current.end)) {
10120 lex_state_set(parser, PM_LEX_STATE_BEG);
10122 }
10123
10124 lex_state_set(parser, PM_LEX_STATE_BEG);
10125
10126 if (match(parser, '\\')) {
10127 lex_state_set(parser, PM_LEX_STATE_END);
10128
10129 pm_buffer_t buffer;
10130 pm_buffer_init_capacity(&buffer, 3);
10131
10132 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10133 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10134
10136 } else {
10137 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10138
10139 // Ternary operators can have a ? immediately followed by an identifier
10140 // which starts with an underscore. We check for this case here.
10141 if (
10142 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10143 (
10144 (parser->current.end + encoding_width >= parser->end) ||
10145 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10146 )
10147 ) {
10148 lex_state_set(parser, PM_LEX_STATE_END);
10149 parser->current.end += encoding_width;
10150 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10152 }
10153 }
10154
10156}
10157
10162static pm_token_type_t
10163lex_at_variable(pm_parser_t *parser) {
10165 const uint8_t *end = parser->end;
10166
10167 size_t width;
10168 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10169 parser->current.end += width;
10170
10171 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10172 parser->current.end += width;
10173 }
10174 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10175 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10176 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10177 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10178 }
10179
10180 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10181 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10182 } else {
10183 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10184 pm_parser_err_token(parser, &parser->current, diag_id);
10185 }
10186
10187 // If we're lexing an embedded variable, then we need to pop back into the
10188 // parent lex context.
10189 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10190 lex_mode_pop(parser);
10191 }
10192
10193 return type;
10194}
10195
10199static inline void
10200parser_lex_callback(pm_parser_t *parser) {
10201 if (parser->lex_callback) {
10202 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10203 }
10204}
10205
10209static inline pm_comment_t *
10210parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10211 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10212 if (comment == NULL) return NULL;
10213
10214 *comment = (pm_comment_t) {
10215 .type = type,
10216 .location = { parser->current.start, parser->current.end }
10217 };
10218
10219 return comment;
10220}
10221
10227static pm_token_type_t
10228lex_embdoc(pm_parser_t *parser) {
10229 // First, lex out the EMBDOC_BEGIN token.
10230 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10231
10232 if (newline == NULL) {
10233 parser->current.end = parser->end;
10234 } else {
10235 pm_newline_list_append(&parser->newline_list, newline);
10236 parser->current.end = newline + 1;
10237 }
10238
10239 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10240 parser_lex_callback(parser);
10241
10242 // Now, create a comment that is going to be attached to the parser.
10243 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10244 if (comment == NULL) return PM_TOKEN_EOF;
10245
10246 // Now, loop until we find the end of the embedded documentation or the end
10247 // of the file.
10248 while (parser->current.end + 4 <= parser->end) {
10249 parser->current.start = parser->current.end;
10250
10251 // If we've hit the end of the embedded documentation then we'll return
10252 // that token here.
10253 if (
10254 (memcmp(parser->current.end, "=end", 4) == 0) &&
10255 (
10256 (parser->current.end + 4 == parser->end) || // end of file
10257 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10258 (parser->current.end[4] == '\0') || // NUL or end of script
10259 (parser->current.end[4] == '\004') || // ^D
10260 (parser->current.end[4] == '\032') // ^Z
10261 )
10262 ) {
10263 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10264
10265 if (newline == NULL) {
10266 parser->current.end = parser->end;
10267 } else {
10268 pm_newline_list_append(&parser->newline_list, newline);
10269 parser->current.end = newline + 1;
10270 }
10271
10272 parser->current.type = PM_TOKEN_EMBDOC_END;
10273 parser_lex_callback(parser);
10274
10275 comment->location.end = parser->current.end;
10276 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10277
10278 return PM_TOKEN_EMBDOC_END;
10279 }
10280
10281 // Otherwise, we'll parse until the end of the line and return a line of
10282 // embedded documentation.
10283 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10284
10285 if (newline == NULL) {
10286 parser->current.end = parser->end;
10287 } else {
10288 pm_newline_list_append(&parser->newline_list, newline);
10289 parser->current.end = newline + 1;
10290 }
10291
10292 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10293 parser_lex_callback(parser);
10294 }
10295
10296 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10297
10298 comment->location.end = parser->current.end;
10299 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10300
10301 return PM_TOKEN_EOF;
10302}
10303
10309static inline void
10310parser_lex_ignored_newline(pm_parser_t *parser) {
10311 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10312 parser_lex_callback(parser);
10313}
10314
10324static inline void
10325parser_flush_heredoc_end(pm_parser_t *parser) {
10326 assert(parser->heredoc_end <= parser->end);
10327 parser->next_start = parser->heredoc_end;
10328 parser->heredoc_end = NULL;
10329}
10330
10334static bool
10335parser_end_of_line_p(const pm_parser_t *parser) {
10336 const uint8_t *cursor = parser->current.end;
10337
10338 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10339 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10340 }
10341
10342 return true;
10343}
10344
10363typedef struct {
10369
10374 const uint8_t *cursor;
10376
10396
10400static inline void
10401pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10402 pm_buffer_append_byte(&token_buffer->buffer, byte);
10403}
10404
10405static inline void
10406pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10407 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10408}
10409
10413static inline size_t
10414parser_char_width(const pm_parser_t *parser) {
10415 size_t width;
10416 if (parser->encoding_changed) {
10417 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10418 } else {
10419 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10420 }
10421
10422 // TODO: If the character is invalid in the given encoding, then we'll just
10423 // push one byte into the buffer. This should actually be an error.
10424 return (width == 0 ? 1 : width);
10425}
10426
10430static void
10431pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10432 size_t width = parser_char_width(parser);
10433 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10434 parser->current.end += width;
10435}
10436
10437static void
10438pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10439 size_t width = parser_char_width(parser);
10440 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10441 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10442 parser->current.end += width;
10443}
10444
10445static bool
10446pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10447 for (size_t index = 0; index < length; index++) {
10448 if (value[index] & 0x80) return false;
10449 }
10450
10451 return true;
10452}
10453
10460static inline void
10461pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10462 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10463}
10464
10465static inline void
10466pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10467 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10468 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10469 pm_buffer_free(&token_buffer->regexp_buffer);
10470}
10471
10481static void
10482pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10483 if (token_buffer->cursor == NULL) {
10484 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10485 } else {
10486 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10487 pm_token_buffer_copy(parser, token_buffer);
10488 }
10489}
10490
10491static void
10492pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10493 if (token_buffer->base.cursor == NULL) {
10494 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10495 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10496 } else {
10497 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10498 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10499 pm_regexp_token_buffer_copy(parser, token_buffer);
10500 }
10501}
10502
10503#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10504
10513static void
10514pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10515 const uint8_t *start;
10516 if (token_buffer->cursor == NULL) {
10517 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10518 start = parser->current.start;
10519 } else {
10520 start = token_buffer->cursor;
10521 }
10522
10523 const uint8_t *end = parser->current.end - 1;
10524 assert(end >= start);
10525 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10526
10527 token_buffer->cursor = end;
10528}
10529
10530static void
10531pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10532 const uint8_t *start;
10533 if (token_buffer->base.cursor == NULL) {
10534 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10535 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10536 start = parser->current.start;
10537 } else {
10538 start = token_buffer->base.cursor;
10539 }
10540
10541 const uint8_t *end = parser->current.end - 1;
10542 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10543 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10544
10545 token_buffer->base.cursor = end;
10546}
10547
10548#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10549
10554static inline size_t
10555pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10556 size_t whitespace = 0;
10557
10558 switch (indent) {
10559 case PM_HEREDOC_INDENT_NONE:
10560 // Do nothing, we can't match a terminator with
10561 // indentation and there's no need to calculate common
10562 // whitespace.
10563 break;
10564 case PM_HEREDOC_INDENT_DASH:
10565 // Skip past inline whitespace.
10566 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10567 break;
10568 case PM_HEREDOC_INDENT_TILDE:
10569 // Skip past inline whitespace and calculate common
10570 // whitespace.
10571 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10572 if (**cursor == '\t') {
10573 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10574 } else {
10575 whitespace++;
10576 }
10577 (*cursor)++;
10578 }
10579
10580 break;
10581 }
10582
10583 return whitespace;
10584}
10585
10590static uint8_t
10591pm_lex_percent_delimiter(pm_parser_t *parser) {
10592 size_t eol_length = match_eol(parser);
10593
10594 if (eol_length) {
10595 if (parser->heredoc_end) {
10596 // If we have already lexed a heredoc, then the newline has already
10597 // been added to the list. In this case we want to just flush the
10598 // heredoc end.
10599 parser_flush_heredoc_end(parser);
10600 } else {
10601 // Otherwise, we'll add the newline to the list of newlines.
10602 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10603 }
10604
10605 uint8_t delimiter = *parser->current.end;
10606
10607 // If our delimiter is \r\n, we want to treat it as if it's \n.
10608 // For example, %\r\nfoo\r\n should be "foo"
10609 if (eol_length == 2) {
10610 delimiter = *(parser->current.end + 1);
10611 }
10612
10613 parser->current.end += eol_length;
10614 return delimiter;
10615 }
10616
10617 return *parser->current.end++;
10618}
10619
10624#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10625
10632static void
10633parser_lex(pm_parser_t *parser) {
10634 assert(parser->current.end <= parser->end);
10635 parser->previous = parser->current;
10636
10637 // This value mirrors cmd_state from CRuby.
10638 bool previous_command_start = parser->command_start;
10639 parser->command_start = false;
10640
10641 // This is used to communicate to the newline lexing function that we've
10642 // already seen a comment.
10643 bool lexed_comment = false;
10644
10645 // Here we cache the current value of the semantic token seen flag. This is
10646 // used to reset it in case we find a token that shouldn't flip this flag.
10647 unsigned int semantic_token_seen = parser->semantic_token_seen;
10648 parser->semantic_token_seen = true;
10649
10650 switch (parser->lex_modes.current->mode) {
10651 case PM_LEX_DEFAULT:
10652 case PM_LEX_EMBEXPR:
10653 case PM_LEX_EMBVAR:
10654
10655 // We have a specific named label here because we are going to jump back to
10656 // this location in the event that we have lexed a token that should not be
10657 // returned to the parser. This includes comments, ignored newlines, and
10658 // invalid tokens of some form.
10659 lex_next_token: {
10660 // If we have the special next_start pointer set, then we're going to jump
10661 // to that location and start lexing from there.
10662 if (parser->next_start != NULL) {
10663 parser->current.end = parser->next_start;
10664 parser->next_start = NULL;
10665 }
10666
10667 // This value mirrors space_seen from CRuby. It tracks whether or not
10668 // space has been eaten before the start of the next token.
10669 bool space_seen = false;
10670
10671 // First, we're going to skip past any whitespace at the front of the next
10672 // token.
10673 bool chomping = true;
10674 while (parser->current.end < parser->end && chomping) {
10675 switch (*parser->current.end) {
10676 case ' ':
10677 case '\t':
10678 case '\f':
10679 case '\v':
10680 parser->current.end++;
10681 space_seen = true;
10682 break;
10683 case '\r':
10684 if (match_eol_offset(parser, 1)) {
10685 chomping = false;
10686 } else {
10687 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10688 parser->current.end++;
10689 space_seen = true;
10690 }
10691 break;
10692 case '\\': {
10693 size_t eol_length = match_eol_offset(parser, 1);
10694 if (eol_length) {
10695 if (parser->heredoc_end) {
10696 parser->current.end = parser->heredoc_end;
10697 parser->heredoc_end = NULL;
10698 } else {
10699 parser->current.end += eol_length + 1;
10700 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10701 space_seen = true;
10702 }
10703 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10704 parser->current.end += 2;
10705 } else {
10706 chomping = false;
10707 }
10708
10709 break;
10710 }
10711 default:
10712 chomping = false;
10713 break;
10714 }
10715 }
10716
10717 // Next, we'll set to start of this token to be the current end.
10718 parser->current.start = parser->current.end;
10719
10720 // We'll check if we're at the end of the file. If we are, then we
10721 // need to return the EOF token.
10722 if (parser->current.end >= parser->end) {
10723 // If we hit EOF, but the EOF came immediately after a newline,
10724 // set the start of the token to the newline. This way any EOF
10725 // errors will be reported as happening on that line rather than
10726 // a line after. For example "foo(\n" should report an error
10727 // on line 1 even though EOF technically occurs on line 2.
10728 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10729 parser->current.start -= 1;
10730 }
10731 LEX(PM_TOKEN_EOF);
10732 }
10733
10734 // Finally, we'll check the current character to determine the next
10735 // token.
10736 switch (*parser->current.end++) {
10737 case '\0': // NUL or end of script
10738 case '\004': // ^D
10739 case '\032': // ^Z
10740 parser->current.end--;
10741 LEX(PM_TOKEN_EOF);
10742
10743 case '#': { // comments
10744 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10745 parser->current.end = ending == NULL ? parser->end : ending;
10746
10747 // If we found a comment while lexing, then we're going to
10748 // add it to the list of comments in the file and keep
10749 // lexing.
10750 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10751 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10752
10753 if (ending) parser->current.end++;
10754 parser->current.type = PM_TOKEN_COMMENT;
10755 parser_lex_callback(parser);
10756
10757 // Here, parse the comment to see if it's a magic comment
10758 // and potentially change state on the parser.
10759 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10760 ptrdiff_t length = parser->current.end - parser->current.start;
10761
10762 // If we didn't find a magic comment within the first
10763 // pass and we're at the start of the file, then we need
10764 // to do another pass to potentially find other patterns
10765 // for encoding comments.
10766 if (length >= 10 && !parser->encoding_locked) {
10767 parser_lex_magic_comment_encoding(parser);
10768 }
10769 }
10770
10771 lexed_comment = true;
10772 }
10774 case '\r':
10775 case '\n': {
10776 parser->semantic_token_seen = semantic_token_seen & 0x1;
10777 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10778
10779 if (eol_length) {
10780 // The only way you can have carriage returns in this
10781 // particular loop is if you have a carriage return
10782 // followed by a newline. In that case we'll just skip
10783 // over the carriage return and continue lexing, in
10784 // order to make it so that the newline token
10785 // encapsulates both the carriage return and the
10786 // newline. Note that we need to check that we haven't
10787 // already lexed a comment here because that falls
10788 // through into here as well.
10789 if (!lexed_comment) {
10790 parser->current.end += eol_length - 1; // skip CR
10791 }
10792
10793 if (parser->heredoc_end == NULL) {
10794 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10795 }
10796 }
10797
10798 if (parser->heredoc_end) {
10799 parser_flush_heredoc_end(parser);
10800 }
10801
10802 // If this is an ignored newline, then we can continue lexing after
10803 // calling the callback with the ignored newline token.
10804 switch (lex_state_ignored_p(parser)) {
10805 case PM_IGNORED_NEWLINE_NONE:
10806 break;
10807 case PM_IGNORED_NEWLINE_PATTERN:
10808 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10809 if (!lexed_comment) parser_lex_ignored_newline(parser);
10810 lex_state_set(parser, PM_LEX_STATE_BEG);
10811 parser->command_start = true;
10812 parser->current.type = PM_TOKEN_NEWLINE;
10813 return;
10814 }
10816 case PM_IGNORED_NEWLINE_ALL:
10817 if (!lexed_comment) parser_lex_ignored_newline(parser);
10818 lexed_comment = false;
10819 goto lex_next_token;
10820 }
10821
10822 // Here we need to look ahead and see if there is a call operator
10823 // (either . or &.) that starts the next line. If there is, then this
10824 // is going to become an ignored newline and we're going to instead
10825 // return the call operator.
10826 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10827 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10828
10829 if (next_content < parser->end) {
10830 // If we hit a comment after a newline, then we're going to check
10831 // if it's ignored or if it's followed by a method call ('.').
10832 // If it is, then we're going to call the
10833 // callback with an ignored newline and then continue lexing.
10834 // Otherwise we'll return a regular newline.
10835 if (next_content[0] == '#') {
10836 // Here we look for a "." or "&." following a "\n".
10837 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10838
10839 while (following && (following + 1 < parser->end)) {
10840 following++;
10841 following += pm_strspn_inline_whitespace(following, parser->end - following);
10842
10843 // If this is not followed by a comment, then we can break out
10844 // of this loop.
10845 if (peek_at(parser, following) != '#') break;
10846
10847 // If there is a comment, then we need to find the end of the
10848 // comment and continue searching from there.
10849 following = next_newline(following, parser->end - following);
10850 }
10851
10852 // If the lex state was ignored, or we hit a '.' or a '&.',
10853 // we will lex the ignored newline
10854 if (
10855 lex_state_ignored_p(parser) ||
10856 (following && (
10857 (peek_at(parser, following) == '.') ||
10858 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10859 ))
10860 ) {
10861 if (!lexed_comment) parser_lex_ignored_newline(parser);
10862 lexed_comment = false;
10863 goto lex_next_token;
10864 }
10865 }
10866
10867 // If we hit a . after a newline, then we're in a call chain and
10868 // we need to return the call operator.
10869 if (next_content[0] == '.') {
10870 // To match ripper, we need to emit an ignored newline even though
10871 // it's a real newline in the case that we have a beginless range
10872 // on a subsequent line.
10873 if (peek_at(parser, next_content + 1) == '.') {
10874 if (!lexed_comment) parser_lex_ignored_newline(parser);
10875 lex_state_set(parser, PM_LEX_STATE_BEG);
10876 parser->command_start = true;
10877 parser->current.type = PM_TOKEN_NEWLINE;
10878 return;
10879 }
10880
10881 if (!lexed_comment) parser_lex_ignored_newline(parser);
10882 lex_state_set(parser, PM_LEX_STATE_DOT);
10883 parser->current.start = next_content;
10884 parser->current.end = next_content + 1;
10885 parser->next_start = NULL;
10886 LEX(PM_TOKEN_DOT);
10887 }
10888
10889 // If we hit a &. after a newline, then we're in a call chain and
10890 // we need to return the call operator.
10891 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10892 if (!lexed_comment) parser_lex_ignored_newline(parser);
10893 lex_state_set(parser, PM_LEX_STATE_DOT);
10894 parser->current.start = next_content;
10895 parser->current.end = next_content + 2;
10896 parser->next_start = NULL;
10898 }
10899 }
10900
10901 // At this point we know this is a regular newline, and we can set the
10902 // necessary state and return the token.
10903 lex_state_set(parser, PM_LEX_STATE_BEG);
10904 parser->command_start = true;
10905 parser->current.type = PM_TOKEN_NEWLINE;
10906 if (!lexed_comment) parser_lex_callback(parser);
10907 return;
10908 }
10909
10910 // ,
10911 case ',':
10912 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10913 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10914 }
10915
10916 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10917 LEX(PM_TOKEN_COMMA);
10918
10919 // (
10920 case '(': {
10922
10923 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10925 }
10926
10927 parser->enclosure_nesting++;
10928 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10929 pm_do_loop_stack_push(parser, false);
10930 LEX(type);
10931 }
10932
10933 // )
10934 case ')':
10935 parser->enclosure_nesting--;
10936 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10937 pm_do_loop_stack_pop(parser);
10939
10940 // ;
10941 case ';':
10942 lex_state_set(parser, PM_LEX_STATE_BEG);
10943 parser->command_start = true;
10944 LEX(PM_TOKEN_SEMICOLON);
10945
10946 // [ [] []=
10947 case '[':
10948 parser->enclosure_nesting++;
10950
10951 if (lex_state_operator_p(parser)) {
10952 if (match(parser, ']')) {
10953 parser->enclosure_nesting--;
10954 lex_state_set(parser, PM_LEX_STATE_ARG);
10956 }
10957
10958 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10959 LEX(type);
10960 }
10961
10962 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10964 }
10965
10966 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10967 pm_do_loop_stack_push(parser, false);
10968 LEX(type);
10969
10970 // ]
10971 case ']':
10972 parser->enclosure_nesting--;
10973 lex_state_set(parser, PM_LEX_STATE_END);
10974 pm_do_loop_stack_pop(parser);
10976
10977 // {
10978 case '{': {
10980
10981 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10982 // This { begins a lambda
10983 parser->command_start = true;
10984 lex_state_set(parser, PM_LEX_STATE_BEG);
10986 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10987 // This { begins a hash literal
10988 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10989 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10990 // This { begins a block
10991 parser->command_start = true;
10992 lex_state_set(parser, PM_LEX_STATE_BEG);
10993 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10994 // This { begins a block on a command
10995 parser->command_start = true;
10996 lex_state_set(parser, PM_LEX_STATE_BEG);
10997 } else {
10998 // This { begins a hash literal
10999 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11000 }
11001
11002 parser->enclosure_nesting++;
11003 parser->brace_nesting++;
11004 pm_do_loop_stack_push(parser, false);
11005
11006 LEX(type);
11007 }
11008
11009 // }
11010 case '}':
11011 parser->enclosure_nesting--;
11012 pm_do_loop_stack_pop(parser);
11013
11014 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11015 lex_mode_pop(parser);
11017 }
11018
11019 parser->brace_nesting--;
11020 lex_state_set(parser, PM_LEX_STATE_END);
11022
11023 // * ** **= *=
11024 case '*': {
11025 if (match(parser, '*')) {
11026 if (match(parser, '=')) {
11027 lex_state_set(parser, PM_LEX_STATE_BEG);
11029 }
11030
11032
11033 if (lex_state_spcarg_p(parser, space_seen)) {
11034 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11036 } else if (lex_state_beg_p(parser)) {
11038 } else if (ambiguous_operator_p(parser, space_seen)) {
11039 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11040 }
11041
11042 if (lex_state_operator_p(parser)) {
11043 lex_state_set(parser, PM_LEX_STATE_ARG);
11044 } else {
11045 lex_state_set(parser, PM_LEX_STATE_BEG);
11046 }
11047
11048 LEX(type);
11049 }
11050
11051 if (match(parser, '=')) {
11052 lex_state_set(parser, PM_LEX_STATE_BEG);
11054 }
11055
11057
11058 if (lex_state_spcarg_p(parser, space_seen)) {
11059 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11061 } else if (lex_state_beg_p(parser)) {
11063 } else if (ambiguous_operator_p(parser, space_seen)) {
11064 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11065 }
11066
11067 if (lex_state_operator_p(parser)) {
11068 lex_state_set(parser, PM_LEX_STATE_ARG);
11069 } else {
11070 lex_state_set(parser, PM_LEX_STATE_BEG);
11071 }
11072
11073 LEX(type);
11074 }
11075
11076 // ! != !~ !@
11077 case '!':
11078 if (lex_state_operator_p(parser)) {
11079 lex_state_set(parser, PM_LEX_STATE_ARG);
11080 if (match(parser, '@')) {
11081 LEX(PM_TOKEN_BANG);
11082 }
11083 } else {
11084 lex_state_set(parser, PM_LEX_STATE_BEG);
11085 }
11086
11087 if (match(parser, '=')) {
11089 }
11090
11091 if (match(parser, '~')) {
11093 }
11094
11095 LEX(PM_TOKEN_BANG);
11096
11097 // = => =~ == === =begin
11098 case '=':
11099 if (
11100 current_token_starts_line(parser) &&
11101 (parser->current.end + 5 <= parser->end) &&
11102 memcmp(parser->current.end, "begin", 5) == 0 &&
11103 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11104 ) {
11105 pm_token_type_t type = lex_embdoc(parser);
11106 if (type == PM_TOKEN_EOF) {
11107 LEX(type);
11108 }
11109
11110 goto lex_next_token;
11111 }
11112
11113 if (lex_state_operator_p(parser)) {
11114 lex_state_set(parser, PM_LEX_STATE_ARG);
11115 } else {
11116 lex_state_set(parser, PM_LEX_STATE_BEG);
11117 }
11118
11119 if (match(parser, '>')) {
11121 }
11122
11123 if (match(parser, '~')) {
11125 }
11126
11127 if (match(parser, '=')) {
11128 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11129 }
11130
11131 LEX(PM_TOKEN_EQUAL);
11132
11133 // < << <<= <= <=>
11134 case '<':
11135 if (match(parser, '<')) {
11136 if (
11137 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11138 !lex_state_end_p(parser) &&
11139 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11140 ) {
11141 const uint8_t *end = parser->current.end;
11142
11143 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11144 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11145
11146 if (match(parser, '-')) {
11147 indent = PM_HEREDOC_INDENT_DASH;
11148 }
11149 else if (match(parser, '~')) {
11150 indent = PM_HEREDOC_INDENT_TILDE;
11151 }
11152
11153 if (match(parser, '`')) {
11154 quote = PM_HEREDOC_QUOTE_BACKTICK;
11155 }
11156 else if (match(parser, '"')) {
11157 quote = PM_HEREDOC_QUOTE_DOUBLE;
11158 }
11159 else if (match(parser, '\'')) {
11160 quote = PM_HEREDOC_QUOTE_SINGLE;
11161 }
11162
11163 const uint8_t *ident_start = parser->current.end;
11164 size_t width = 0;
11165
11166 if (parser->current.end >= parser->end) {
11167 parser->current.end = end;
11168 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11169 parser->current.end = end;
11170 } else {
11171 if (quote == PM_HEREDOC_QUOTE_NONE) {
11172 parser->current.end += width;
11173
11174 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11175 parser->current.end += width;
11176 }
11177 } else {
11178 // If we have quotes, then we're going to go until we find the
11179 // end quote.
11180 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11181 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11182 parser->current.end++;
11183 }
11184 }
11185
11186 size_t ident_length = (size_t) (parser->current.end - ident_start);
11187 bool ident_error = false;
11188
11189 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11190 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11191 ident_error = true;
11192 }
11193
11194 parser->explicit_encoding = NULL;
11195 lex_mode_push(parser, (pm_lex_mode_t) {
11196 .mode = PM_LEX_HEREDOC,
11197 .as.heredoc = {
11198 .base = {
11199 .ident_start = ident_start,
11200 .ident_length = ident_length,
11201 .quote = quote,
11202 .indent = indent
11203 },
11204 .next_start = parser->current.end,
11205 .common_whitespace = NULL,
11206 .line_continuation = false
11207 }
11208 });
11209
11210 if (parser->heredoc_end == NULL) {
11211 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11212
11213 if (body_start == NULL) {
11214 // If there is no newline after the heredoc identifier, then
11215 // this is not a valid heredoc declaration. In this case we
11216 // will add an error, but we will still return a heredoc
11217 // start.
11218 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11219 body_start = parser->end;
11220 } else {
11221 // Otherwise, we want to indicate that the body of the
11222 // heredoc starts on the character after the next newline.
11223 pm_newline_list_append(&parser->newline_list, body_start);
11224 body_start++;
11225 }
11226
11227 parser->next_start = body_start;
11228 } else {
11229 parser->next_start = parser->heredoc_end;
11230 }
11231
11233 }
11234 }
11235
11236 if (match(parser, '=')) {
11237 lex_state_set(parser, PM_LEX_STATE_BEG);
11239 }
11240
11241 if (ambiguous_operator_p(parser, space_seen)) {
11242 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11243 }
11244
11245 if (lex_state_operator_p(parser)) {
11246 lex_state_set(parser, PM_LEX_STATE_ARG);
11247 } else {
11248 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11249 lex_state_set(parser, PM_LEX_STATE_BEG);
11250 }
11251
11252 LEX(PM_TOKEN_LESS_LESS);
11253 }
11254
11255 if (lex_state_operator_p(parser)) {
11256 lex_state_set(parser, PM_LEX_STATE_ARG);
11257 } else {
11258 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11259 lex_state_set(parser, PM_LEX_STATE_BEG);
11260 }
11261
11262 if (match(parser, '=')) {
11263 if (match(parser, '>')) {
11265 }
11266
11268 }
11269
11270 LEX(PM_TOKEN_LESS);
11271
11272 // > >> >>= >=
11273 case '>':
11274 if (match(parser, '>')) {
11275 if (lex_state_operator_p(parser)) {
11276 lex_state_set(parser, PM_LEX_STATE_ARG);
11277 } else {
11278 lex_state_set(parser, PM_LEX_STATE_BEG);
11279 }
11280 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11281 }
11282
11283 if (lex_state_operator_p(parser)) {
11284 lex_state_set(parser, PM_LEX_STATE_ARG);
11285 } else {
11286 lex_state_set(parser, PM_LEX_STATE_BEG);
11287 }
11288
11289 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11290
11291 // double-quoted string literal
11292 case '"': {
11293 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11294 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11296 }
11297
11298 // xstring literal
11299 case '`': {
11300 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11301 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11302 LEX(PM_TOKEN_BACKTICK);
11303 }
11304
11305 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11306 if (previous_command_start) {
11307 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11308 } else {
11309 lex_state_set(parser, PM_LEX_STATE_ARG);
11310 }
11311
11312 LEX(PM_TOKEN_BACKTICK);
11313 }
11314
11315 lex_mode_push_string(parser, true, false, '\0', '`');
11316 LEX(PM_TOKEN_BACKTICK);
11317 }
11318
11319 // single-quoted string literal
11320 case '\'': {
11321 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11322 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11324 }
11325
11326 // ? character literal
11327 case '?':
11328 LEX(lex_question_mark(parser));
11329
11330 // & && &&= &=
11331 case '&': {
11332 if (match(parser, '&')) {
11333 lex_state_set(parser, PM_LEX_STATE_BEG);
11334
11335 if (match(parser, '=')) {
11337 }
11338
11340 }
11341
11342 if (match(parser, '=')) {
11343 lex_state_set(parser, PM_LEX_STATE_BEG);
11345 }
11346
11347 if (match(parser, '.')) {
11348 lex_state_set(parser, PM_LEX_STATE_DOT);
11350 }
11351
11353 if (lex_state_spcarg_p(parser, space_seen)) {
11354 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11355 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11356 } else {
11357 const uint8_t delim = peek_offset(parser, 1);
11358
11359 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11360 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11361 }
11362 }
11363
11365 } else if (lex_state_beg_p(parser)) {
11367 } else if (ambiguous_operator_p(parser, space_seen)) {
11368 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11369 }
11370
11371 if (lex_state_operator_p(parser)) {
11372 lex_state_set(parser, PM_LEX_STATE_ARG);
11373 } else {
11374 lex_state_set(parser, PM_LEX_STATE_BEG);
11375 }
11376
11377 LEX(type);
11378 }
11379
11380 // | || ||= |=
11381 case '|':
11382 if (match(parser, '|')) {
11383 if (match(parser, '=')) {
11384 lex_state_set(parser, PM_LEX_STATE_BEG);
11386 }
11387
11388 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11389 parser->current.end--;
11390 LEX(PM_TOKEN_PIPE);
11391 }
11392
11393 lex_state_set(parser, PM_LEX_STATE_BEG);
11394 LEX(PM_TOKEN_PIPE_PIPE);
11395 }
11396
11397 if (match(parser, '=')) {
11398 lex_state_set(parser, PM_LEX_STATE_BEG);
11400 }
11401
11402 if (lex_state_operator_p(parser)) {
11403 lex_state_set(parser, PM_LEX_STATE_ARG);
11404 } else {
11405 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11406 }
11407
11408 LEX(PM_TOKEN_PIPE);
11409
11410 // + += +@
11411 case '+': {
11412 if (lex_state_operator_p(parser)) {
11413 lex_state_set(parser, PM_LEX_STATE_ARG);
11414
11415 if (match(parser, '@')) {
11416 LEX(PM_TOKEN_UPLUS);
11417 }
11418
11419 LEX(PM_TOKEN_PLUS);
11420 }
11421
11422 if (match(parser, '=')) {
11423 lex_state_set(parser, PM_LEX_STATE_BEG);
11425 }
11426
11427 if (
11428 lex_state_beg_p(parser) ||
11429 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11430 ) {
11431 lex_state_set(parser, PM_LEX_STATE_BEG);
11432
11433 if (pm_char_is_decimal_digit(peek(parser))) {
11434 parser->current.end++;
11435 pm_token_type_t type = lex_numeric(parser);
11436 lex_state_set(parser, PM_LEX_STATE_END);
11437 LEX(type);
11438 }
11439
11440 LEX(PM_TOKEN_UPLUS);
11441 }
11442
11443 if (ambiguous_operator_p(parser, space_seen)) {
11444 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11445 }
11446
11447 lex_state_set(parser, PM_LEX_STATE_BEG);
11448 LEX(PM_TOKEN_PLUS);
11449 }
11450
11451 // - -= -@
11452 case '-': {
11453 if (lex_state_operator_p(parser)) {
11454 lex_state_set(parser, PM_LEX_STATE_ARG);
11455
11456 if (match(parser, '@')) {
11457 LEX(PM_TOKEN_UMINUS);
11458 }
11459
11460 LEX(PM_TOKEN_MINUS);
11461 }
11462
11463 if (match(parser, '=')) {
11464 lex_state_set(parser, PM_LEX_STATE_BEG);
11466 }
11467
11468 if (match(parser, '>')) {
11469 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11471 }
11472
11473 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11474 bool is_beg = lex_state_beg_p(parser);
11475 if (!is_beg && spcarg) {
11476 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11477 }
11478
11479 if (is_beg || spcarg) {
11480 lex_state_set(parser, PM_LEX_STATE_BEG);
11481 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11482 }
11483
11484 if (ambiguous_operator_p(parser, space_seen)) {
11485 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11486 }
11487
11488 lex_state_set(parser, PM_LEX_STATE_BEG);
11489 LEX(PM_TOKEN_MINUS);
11490 }
11491
11492 // . .. ...
11493 case '.': {
11494 bool beg_p = lex_state_beg_p(parser);
11495
11496 if (match(parser, '.')) {
11497 if (match(parser, '.')) {
11498 // If we're _not_ inside a range within default parameters
11499 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11500 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11501 lex_state_set(parser, PM_LEX_STATE_BEG);
11502 } else {
11503 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11504 }
11506 }
11507
11508 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11509 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11510 }
11511
11512 lex_state_set(parser, PM_LEX_STATE_BEG);
11514 }
11515
11516 lex_state_set(parser, PM_LEX_STATE_BEG);
11517 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11518 }
11519
11520 lex_state_set(parser, PM_LEX_STATE_DOT);
11521 LEX(PM_TOKEN_DOT);
11522 }
11523
11524 // integer
11525 case '0':
11526 case '1':
11527 case '2':
11528 case '3':
11529 case '4':
11530 case '5':
11531 case '6':
11532 case '7':
11533 case '8':
11534 case '9': {
11535 pm_token_type_t type = lex_numeric(parser);
11536 lex_state_set(parser, PM_LEX_STATE_END);
11537 LEX(type);
11538 }
11539
11540 // :: symbol
11541 case ':':
11542 if (match(parser, ':')) {
11543 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11544 lex_state_set(parser, PM_LEX_STATE_BEG);
11546 }
11547
11548 lex_state_set(parser, PM_LEX_STATE_DOT);
11550 }
11551
11552 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11553 lex_state_set(parser, PM_LEX_STATE_BEG);
11554 LEX(PM_TOKEN_COLON);
11555 }
11556
11557 if (peek(parser) == '"' || peek(parser) == '\'') {
11558 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11559 parser->current.end++;
11560 }
11561
11562 lex_state_set(parser, PM_LEX_STATE_FNAME);
11564
11565 // / /=
11566 case '/':
11567 if (lex_state_beg_p(parser)) {
11568 lex_mode_push_regexp(parser, '\0', '/');
11570 }
11571
11572 if (match(parser, '=')) {
11573 lex_state_set(parser, PM_LEX_STATE_BEG);
11575 }
11576
11577 if (lex_state_spcarg_p(parser, space_seen)) {
11578 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11579 lex_mode_push_regexp(parser, '\0', '/');
11581 }
11582
11583 if (ambiguous_operator_p(parser, space_seen)) {
11584 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11585 }
11586
11587 if (lex_state_operator_p(parser)) {
11588 lex_state_set(parser, PM_LEX_STATE_ARG);
11589 } else {
11590 lex_state_set(parser, PM_LEX_STATE_BEG);
11591 }
11592
11593 LEX(PM_TOKEN_SLASH);
11594
11595 // ^ ^=
11596 case '^':
11597 if (lex_state_operator_p(parser)) {
11598 lex_state_set(parser, PM_LEX_STATE_ARG);
11599 } else {
11600 lex_state_set(parser, PM_LEX_STATE_BEG);
11601 }
11602 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11603
11604 // ~ ~@
11605 case '~':
11606 if (lex_state_operator_p(parser)) {
11607 (void) match(parser, '@');
11608 lex_state_set(parser, PM_LEX_STATE_ARG);
11609 } else {
11610 lex_state_set(parser, PM_LEX_STATE_BEG);
11611 }
11612
11613 LEX(PM_TOKEN_TILDE);
11614
11615 // % %= %i %I %q %Q %w %W
11616 case '%': {
11617 // If there is no subsequent character then we have an
11618 // invalid token. We're going to say it's the percent
11619 // operator because we don't want to move into the string
11620 // lex mode unnecessarily.
11621 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11622 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11623 LEX(PM_TOKEN_PERCENT);
11624 }
11625
11626 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11627 lex_state_set(parser, PM_LEX_STATE_BEG);
11629 } else if (
11630 lex_state_beg_p(parser) ||
11631 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11632 lex_state_spcarg_p(parser, space_seen)
11633 ) {
11634 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11635 if (*parser->current.end >= 0x80) {
11636 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11637 }
11638
11639 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11640 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11642 }
11643
11644 // Delimiters for %-literals cannot be alphanumeric. We
11645 // validate that here.
11646 uint8_t delimiter = peek_offset(parser, 1);
11647 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11648 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11649 goto lex_next_token;
11650 }
11651
11652 switch (peek(parser)) {
11653 case 'i': {
11654 parser->current.end++;
11655
11656 if (parser->current.end < parser->end) {
11657 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11658 } else {
11659 lex_mode_push_list_eof(parser);
11660 }
11661
11663 }
11664 case 'I': {
11665 parser->current.end++;
11666
11667 if (parser->current.end < parser->end) {
11668 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11669 } else {
11670 lex_mode_push_list_eof(parser);
11671 }
11672
11674 }
11675 case 'r': {
11676 parser->current.end++;
11677
11678 if (parser->current.end < parser->end) {
11679 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11680 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11681 } else {
11682 lex_mode_push_regexp(parser, '\0', '\0');
11683 }
11684
11686 }
11687 case 'q': {
11688 parser->current.end++;
11689
11690 if (parser->current.end < parser->end) {
11691 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11692 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11693 } else {
11694 lex_mode_push_string_eof(parser);
11695 }
11696
11698 }
11699 case 'Q': {
11700 parser->current.end++;
11701
11702 if (parser->current.end < parser->end) {
11703 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11704 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11705 } else {
11706 lex_mode_push_string_eof(parser);
11707 }
11708
11710 }
11711 case 's': {
11712 parser->current.end++;
11713
11714 if (parser->current.end < parser->end) {
11715 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11716 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11717 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11718 } else {
11719 lex_mode_push_string_eof(parser);
11720 }
11721
11723 }
11724 case 'w': {
11725 parser->current.end++;
11726
11727 if (parser->current.end < parser->end) {
11728 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11729 } else {
11730 lex_mode_push_list_eof(parser);
11731 }
11732
11734 }
11735 case 'W': {
11736 parser->current.end++;
11737
11738 if (parser->current.end < parser->end) {
11739 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11740 } else {
11741 lex_mode_push_list_eof(parser);
11742 }
11743
11745 }
11746 case 'x': {
11747 parser->current.end++;
11748
11749 if (parser->current.end < parser->end) {
11750 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11751 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11752 } else {
11753 lex_mode_push_string_eof(parser);
11754 }
11755
11757 }
11758 default:
11759 // If we get to this point, then we have a % that is completely
11760 // unparsable. In this case we'll just drop it from the parser
11761 // and skip past it and hope that the next token is something
11762 // that we can parse.
11763 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11764 goto lex_next_token;
11765 }
11766 }
11767
11768 if (ambiguous_operator_p(parser, space_seen)) {
11769 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11770 }
11771
11772 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11773 LEX(PM_TOKEN_PERCENT);
11774 }
11775
11776 // global variable
11777 case '$': {
11778 pm_token_type_t type = lex_global_variable(parser);
11779
11780 // If we're lexing an embedded variable, then we need to pop back into
11781 // the parent lex context.
11782 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11783 lex_mode_pop(parser);
11784 }
11785
11786 lex_state_set(parser, PM_LEX_STATE_END);
11787 LEX(type);
11788 }
11789
11790 // instance variable, class variable
11791 case '@':
11792 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11793 LEX(lex_at_variable(parser));
11794
11795 default: {
11796 if (*parser->current.start != '_') {
11797 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11798
11799 // If this isn't the beginning of an identifier, then
11800 // it's an invalid token as we've exhausted all of the
11801 // other options. We'll skip past it and return the next
11802 // token after adding an appropriate error message.
11803 if (!width) {
11804 if (*parser->current.start >= 0x80) {
11805 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11806 } else if (*parser->current.start == '\\') {
11807 switch (peek_at(parser, parser->current.start + 1)) {
11808 case ' ':
11809 parser->current.end++;
11810 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11811 break;
11812 case '\f':
11813 parser->current.end++;
11814 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11815 break;
11816 case '\t':
11817 parser->current.end++;
11818 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11819 break;
11820 case '\v':
11821 parser->current.end++;
11822 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11823 break;
11824 case '\r':
11825 if (peek_at(parser, parser->current.start + 2) != '\n') {
11826 parser->current.end++;
11827 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11828 break;
11829 }
11831 default:
11832 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11833 break;
11834 }
11835 } else if (char_is_ascii_printable(*parser->current.start)) {
11836 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11837 } else {
11838 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11839 }
11840
11841 goto lex_next_token;
11842 }
11843
11844 parser->current.end = parser->current.start + width;
11845 }
11846
11847 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11848
11849 // If we've hit a __END__ and it was at the start of the
11850 // line or the start of the file and it is followed by
11851 // either a \n or a \r\n, then this is the last token of the
11852 // file.
11853 if (
11854 ((parser->current.end - parser->current.start) == 7) &&
11855 current_token_starts_line(parser) &&
11856 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11857 (parser->current.end == parser->end || match_eol(parser))
11858 ) {
11859 // Since we know we're about to add an __END__ comment,
11860 // we know we need to add all of the newlines to get the
11861 // correct column information for it.
11862 const uint8_t *cursor = parser->current.end;
11863 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11864 pm_newline_list_append(&parser->newline_list, cursor++);
11865 }
11866
11867 parser->current.end = parser->end;
11868 parser->current.type = PM_TOKEN___END__;
11869 parser_lex_callback(parser);
11870
11871 parser->data_loc.start = parser->current.start;
11872 parser->data_loc.end = parser->current.end;
11873
11874 LEX(PM_TOKEN_EOF);
11875 }
11876
11877 pm_lex_state_t last_state = parser->lex_state;
11878
11880 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11881 if (previous_command_start) {
11882 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11883 } else {
11884 lex_state_set(parser, PM_LEX_STATE_ARG);
11885 }
11886 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11887 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11888 } else {
11889 lex_state_set(parser, PM_LEX_STATE_END);
11890 }
11891 }
11892
11893 if (
11894 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11896 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11897 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11898 ) {
11899 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11900 }
11901
11902 LEX(type);
11903 }
11904 }
11905 }
11906 case PM_LEX_LIST: {
11907 if (parser->next_start != NULL) {
11908 parser->current.end = parser->next_start;
11909 parser->next_start = NULL;
11910 }
11911
11912 // First we'll set the beginning of the token.
11913 parser->current.start = parser->current.end;
11914
11915 // If there's any whitespace at the start of the list, then we're
11916 // going to trim it off the beginning and create a new token.
11917 size_t whitespace;
11918
11919 if (parser->heredoc_end) {
11920 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11921 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11922 whitespace += 1;
11923 }
11924 } else {
11925 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11926 }
11927
11928 if (whitespace > 0) {
11929 parser->current.end += whitespace;
11930 if (peek_offset(parser, -1) == '\n') {
11931 // mutates next_start
11932 parser_flush_heredoc_end(parser);
11933 }
11934 LEX(PM_TOKEN_WORDS_SEP);
11935 }
11936
11937 // We'll check if we're at the end of the file. If we are, then we
11938 // need to return the EOF token.
11939 if (parser->current.end >= parser->end) {
11940 LEX(PM_TOKEN_EOF);
11941 }
11942
11943 // Here we'll get a list of the places where strpbrk should break,
11944 // and then find the first one.
11945 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11946 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11947 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11948
11949 // If we haven't found an escape yet, then this buffer will be
11950 // unallocated since we can refer directly to the source string.
11951 pm_token_buffer_t token_buffer = { 0 };
11952
11953 while (breakpoint != NULL) {
11954 // If we hit whitespace, then we must have received content by
11955 // now, so we can return an element of the list.
11956 if (pm_char_is_whitespace(*breakpoint)) {
11957 parser->current.end = breakpoint;
11958 pm_token_buffer_flush(parser, &token_buffer);
11960 }
11961
11962 // If we hit the terminator, we need to check which token to
11963 // return.
11964 if (*breakpoint == lex_mode->as.list.terminator) {
11965 // If this terminator doesn't actually close the list, then
11966 // we need to continue on past it.
11967 if (lex_mode->as.list.nesting > 0) {
11968 parser->current.end = breakpoint + 1;
11969 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11970 lex_mode->as.list.nesting--;
11971 continue;
11972 }
11973
11974 // If we've hit the terminator and we've already skipped
11975 // past content, then we can return a list node.
11976 if (breakpoint > parser->current.start) {
11977 parser->current.end = breakpoint;
11978 pm_token_buffer_flush(parser, &token_buffer);
11980 }
11981
11982 // Otherwise, switch back to the default state and return
11983 // the end of the list.
11984 parser->current.end = breakpoint + 1;
11985 lex_mode_pop(parser);
11986 lex_state_set(parser, PM_LEX_STATE_END);
11988 }
11989
11990 // If we hit a null byte, skip directly past it.
11991 if (*breakpoint == '\0') {
11992 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11993 continue;
11994 }
11995
11996 // If we hit escapes, then we need to treat the next token
11997 // literally. In this case we'll skip past the next character
11998 // and find the next breakpoint.
11999 if (*breakpoint == '\\') {
12000 parser->current.end = breakpoint + 1;
12001
12002 // If we've hit the end of the file, then break out of the
12003 // loop by setting the breakpoint to NULL.
12004 if (parser->current.end == parser->end) {
12005 breakpoint = NULL;
12006 continue;
12007 }
12008
12009 pm_token_buffer_escape(parser, &token_buffer);
12010 uint8_t peeked = peek(parser);
12011
12012 switch (peeked) {
12013 case ' ':
12014 case '\f':
12015 case '\t':
12016 case '\v':
12017 case '\\':
12018 pm_token_buffer_push_byte(&token_buffer, peeked);
12019 parser->current.end++;
12020 break;
12021 case '\r':
12022 parser->current.end++;
12023 if (peek(parser) != '\n') {
12024 pm_token_buffer_push_byte(&token_buffer, '\r');
12025 break;
12026 }
12028 case '\n':
12029 pm_token_buffer_push_byte(&token_buffer, '\n');
12030
12031 if (parser->heredoc_end) {
12032 // ... if we are on the same line as a heredoc,
12033 // flush the heredoc and continue parsing after
12034 // heredoc_end.
12035 parser_flush_heredoc_end(parser);
12036 pm_token_buffer_copy(parser, &token_buffer);
12038 } else {
12039 // ... else track the newline.
12040 pm_newline_list_append(&parser->newline_list, parser->current.end);
12041 }
12042
12043 parser->current.end++;
12044 break;
12045 default:
12046 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12047 pm_token_buffer_push_byte(&token_buffer, peeked);
12048 parser->current.end++;
12049 } else if (lex_mode->as.list.interpolation) {
12050 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12051 } else {
12052 pm_token_buffer_push_byte(&token_buffer, '\\');
12053 pm_token_buffer_push_escaped(&token_buffer, parser);
12054 }
12055
12056 break;
12057 }
12058
12059 token_buffer.cursor = parser->current.end;
12060 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12061 continue;
12062 }
12063
12064 // If we hit a #, then we will attempt to lex interpolation.
12065 if (*breakpoint == '#') {
12066 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12067
12068 if (type == PM_TOKEN_NOT_PROVIDED) {
12069 // If we haven't returned at this point then we had something
12070 // that looked like an interpolated class or instance variable
12071 // like "#@" but wasn't actually. In this case we'll just skip
12072 // to the next breakpoint.
12073 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12074 continue;
12075 }
12076
12078 pm_token_buffer_flush(parser, &token_buffer);
12079 }
12080
12081 LEX(type);
12082 }
12083
12084 // If we've hit the incrementor, then we need to skip past it
12085 // and find the next breakpoint.
12086 assert(*breakpoint == lex_mode->as.list.incrementor);
12087 parser->current.end = breakpoint + 1;
12088 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12089 lex_mode->as.list.nesting++;
12090 continue;
12091 }
12092
12093 if (parser->current.end > parser->current.start) {
12094 pm_token_buffer_flush(parser, &token_buffer);
12096 }
12097
12098 // If we were unable to find a breakpoint, then this token hits the
12099 // end of the file.
12100 parser->current.end = parser->end;
12101 pm_token_buffer_flush(parser, &token_buffer);
12103 }
12104 case PM_LEX_REGEXP: {
12105 // First, we'll set to start of this token to be the current end.
12106 if (parser->next_start == NULL) {
12107 parser->current.start = parser->current.end;
12108 } else {
12109 parser->current.start = parser->next_start;
12110 parser->current.end = parser->next_start;
12111 parser->next_start = NULL;
12112 }
12113
12114 // We'll check if we're at the end of the file. If we are, then we
12115 // need to return the EOF token.
12116 if (parser->current.end >= parser->end) {
12117 LEX(PM_TOKEN_EOF);
12118 }
12119
12120 // Get a reference to the current mode.
12121 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12122
12123 // These are the places where we need to split up the content of the
12124 // regular expression. We'll use strpbrk to find the first of these
12125 // characters.
12126 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12127 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12128 pm_regexp_token_buffer_t token_buffer = { 0 };
12129
12130 while (breakpoint != NULL) {
12131 uint8_t term = lex_mode->as.regexp.terminator;
12132 bool is_terminator = (*breakpoint == term);
12133
12134 // If the terminator is newline, we need to consider \r\n _also_ a newline
12135 // For example: `%\nfoo\r\n`
12136 // The string should be "foo", not "foo\r"
12137 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12138 if (term == '\n') {
12139 is_terminator = true;
12140 }
12141
12142 // If the terminator is a CR, but we see a CRLF, we need to
12143 // treat the CRLF as a newline, meaning this is _not_ the
12144 // terminator
12145 if (term == '\r') {
12146 is_terminator = false;
12147 }
12148 }
12149
12150 // If we hit the terminator, we need to determine what kind of
12151 // token to return.
12152 if (is_terminator) {
12153 if (lex_mode->as.regexp.nesting > 0) {
12154 parser->current.end = breakpoint + 1;
12155 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12156 lex_mode->as.regexp.nesting--;
12157 continue;
12158 }
12159
12160 // Here we've hit the terminator. If we have already consumed
12161 // content then we need to return that content as string content
12162 // first.
12163 if (breakpoint > parser->current.start) {
12164 parser->current.end = breakpoint;
12165 pm_regexp_token_buffer_flush(parser, &token_buffer);
12167 }
12168
12169 // Check here if we need to track the newline.
12170 size_t eol_length = match_eol_at(parser, breakpoint);
12171 if (eol_length) {
12172 parser->current.end = breakpoint + eol_length;
12173 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12174 } else {
12175 parser->current.end = breakpoint + 1;
12176 }
12177
12178 // Since we've hit the terminator of the regular expression,
12179 // we now need to parse the options.
12180 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12181
12182 lex_mode_pop(parser);
12183 lex_state_set(parser, PM_LEX_STATE_END);
12185 }
12186
12187 // If we've hit the incrementor, then we need to skip past it
12188 // and find the next breakpoint.
12189 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12190 parser->current.end = breakpoint + 1;
12191 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12192 lex_mode->as.regexp.nesting++;
12193 continue;
12194 }
12195
12196 switch (*breakpoint) {
12197 case '\0':
12198 // If we hit a null byte, skip directly past it.
12199 parser->current.end = breakpoint + 1;
12200 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12201 break;
12202 case '\r':
12203 if (peek_at(parser, breakpoint + 1) != '\n') {
12204 parser->current.end = breakpoint + 1;
12205 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12206 break;
12207 }
12208
12209 breakpoint++;
12210 parser->current.end = breakpoint;
12211 pm_regexp_token_buffer_escape(parser, &token_buffer);
12212 token_buffer.base.cursor = breakpoint;
12213
12215 case '\n':
12216 // If we've hit a newline, then we need to track that in
12217 // the list of newlines.
12218 if (parser->heredoc_end == NULL) {
12219 pm_newline_list_append(&parser->newline_list, breakpoint);
12220 parser->current.end = breakpoint + 1;
12221 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12222 break;
12223 }
12224
12225 parser->current.end = breakpoint + 1;
12226 parser_flush_heredoc_end(parser);
12227 pm_regexp_token_buffer_flush(parser, &token_buffer);
12229 case '\\': {
12230 // If we hit escapes, then we need to treat the next
12231 // token literally. In this case we'll skip past the
12232 // next character and find the next breakpoint.
12233 parser->current.end = breakpoint + 1;
12234
12235 // If we've hit the end of the file, then break out of
12236 // the loop by setting the breakpoint to NULL.
12237 if (parser->current.end == parser->end) {
12238 breakpoint = NULL;
12239 break;
12240 }
12241
12242 pm_regexp_token_buffer_escape(parser, &token_buffer);
12243 uint8_t peeked = peek(parser);
12244
12245 switch (peeked) {
12246 case '\r':
12247 parser->current.end++;
12248 if (peek(parser) != '\n') {
12249 if (lex_mode->as.regexp.terminator != '\r') {
12250 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12251 }
12252 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12253 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12254 break;
12255 }
12257 case '\n':
12258 if (parser->heredoc_end) {
12259 // ... if we are on the same line as a heredoc,
12260 // flush the heredoc and continue parsing after
12261 // heredoc_end.
12262 parser_flush_heredoc_end(parser);
12263 pm_regexp_token_buffer_copy(parser, &token_buffer);
12265 } else {
12266 // ... else track the newline.
12267 pm_newline_list_append(&parser->newline_list, parser->current.end);
12268 }
12269
12270 parser->current.end++;
12271 break;
12272 case 'c':
12273 case 'C':
12274 case 'M':
12275 case 'u':
12276 case 'x':
12277 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12278 break;
12279 default:
12280 if (lex_mode->as.regexp.terminator == peeked) {
12281 // Some characters when they are used as the
12282 // terminator also receive an escape. They are
12283 // enumerated here.
12284 switch (peeked) {
12285 case '$': case ')': case '*': case '+':
12286 case '.': case '>': case '?': case ']':
12287 case '^': case '|': case '}':
12288 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12289 break;
12290 default:
12291 break;
12292 }
12293
12294 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12295 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12296 parser->current.end++;
12297 break;
12298 }
12299
12300 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12301 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12302 break;
12303 }
12304
12305 token_buffer.base.cursor = parser->current.end;
12306 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12307 break;
12308 }
12309 case '#': {
12310 // If we hit a #, then we will attempt to lex
12311 // interpolation.
12312 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12313
12314 if (type == PM_TOKEN_NOT_PROVIDED) {
12315 // If we haven't returned at this point then we had
12316 // something that looked like an interpolated class or
12317 // instance variable like "#@" but wasn't actually. In
12318 // this case we'll just skip to the next breakpoint.
12319 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12320 break;
12321 }
12322
12324 pm_regexp_token_buffer_flush(parser, &token_buffer);
12325 }
12326
12327 LEX(type);
12328 }
12329 default:
12330 assert(false && "unreachable");
12331 break;
12332 }
12333 }
12334
12335 if (parser->current.end > parser->current.start) {
12336 pm_regexp_token_buffer_flush(parser, &token_buffer);
12338 }
12339
12340 // If we were unable to find a breakpoint, then this token hits the
12341 // end of the file.
12342 parser->current.end = parser->end;
12343 pm_regexp_token_buffer_flush(parser, &token_buffer);
12345 }
12346 case PM_LEX_STRING: {
12347 // First, we'll set to start of this token to be the current end.
12348 if (parser->next_start == NULL) {
12349 parser->current.start = parser->current.end;
12350 } else {
12351 parser->current.start = parser->next_start;
12352 parser->current.end = parser->next_start;
12353 parser->next_start = NULL;
12354 }
12355
12356 // We'll check if we're at the end of the file. If we are, then we need to
12357 // return the EOF token.
12358 if (parser->current.end >= parser->end) {
12359 LEX(PM_TOKEN_EOF);
12360 }
12361
12362 // These are the places where we need to split up the content of the
12363 // string. We'll use strpbrk to find the first of these characters.
12364 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12365 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12366 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12367
12368 // If we haven't found an escape yet, then this buffer will be
12369 // unallocated since we can refer directly to the source string.
12370 pm_token_buffer_t token_buffer = { 0 };
12371
12372 while (breakpoint != NULL) {
12373 // If we hit the incrementor, then we'll increment then nesting and
12374 // continue lexing.
12375 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12376 lex_mode->as.string.nesting++;
12377 parser->current.end = breakpoint + 1;
12378 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12379 continue;
12380 }
12381
12382 uint8_t term = lex_mode->as.string.terminator;
12383 bool is_terminator = (*breakpoint == term);
12384
12385 // If the terminator is newline, we need to consider \r\n _also_ a newline
12386 // For example: `%r\nfoo\r\n`
12387 // The string should be /foo/, not /foo\r/
12388 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12389 if (term == '\n') {
12390 is_terminator = true;
12391 }
12392
12393 // If the terminator is a CR, but we see a CRLF, we need to
12394 // treat the CRLF as a newline, meaning this is _not_ the
12395 // terminator
12396 if (term == '\r') {
12397 is_terminator = false;
12398 }
12399 }
12400
12401 // Note that we have to check the terminator here first because we could
12402 // potentially be parsing a % string that has a # character as the
12403 // terminator.
12404 if (is_terminator) {
12405 // If this terminator doesn't actually close the string, then we need
12406 // to continue on past it.
12407 if (lex_mode->as.string.nesting > 0) {
12408 parser->current.end = breakpoint + 1;
12409 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12410 lex_mode->as.string.nesting--;
12411 continue;
12412 }
12413
12414 // Here we've hit the terminator. If we have already consumed content
12415 // then we need to return that content as string content first.
12416 if (breakpoint > parser->current.start) {
12417 parser->current.end = breakpoint;
12418 pm_token_buffer_flush(parser, &token_buffer);
12420 }
12421
12422 // Otherwise we need to switch back to the parent lex mode and
12423 // return the end of the string.
12424 size_t eol_length = match_eol_at(parser, breakpoint);
12425 if (eol_length) {
12426 parser->current.end = breakpoint + eol_length;
12427 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12428 } else {
12429 parser->current.end = breakpoint + 1;
12430 }
12431
12432 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12433 parser->current.end++;
12434 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12435 lex_mode_pop(parser);
12436 LEX(PM_TOKEN_LABEL_END);
12437 }
12438
12439 lex_state_set(parser, PM_LEX_STATE_END);
12440 lex_mode_pop(parser);
12442 }
12443
12444 switch (*breakpoint) {
12445 case '\0':
12446 // Skip directly past the null character.
12447 parser->current.end = breakpoint + 1;
12448 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12449 break;
12450 case '\r':
12451 if (peek_at(parser, breakpoint + 1) != '\n') {
12452 parser->current.end = breakpoint + 1;
12453 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12454 break;
12455 }
12456
12457 // If we hit a \r\n sequence, then we need to treat it
12458 // as a newline.
12459 breakpoint++;
12460 parser->current.end = breakpoint;
12461 pm_token_buffer_escape(parser, &token_buffer);
12462 token_buffer.cursor = breakpoint;
12463
12465 case '\n':
12466 // When we hit a newline, we need to flush any potential
12467 // heredocs. Note that this has to happen after we check
12468 // for the terminator in case the terminator is a
12469 // newline character.
12470 if (parser->heredoc_end == NULL) {
12471 pm_newline_list_append(&parser->newline_list, breakpoint);
12472 parser->current.end = breakpoint + 1;
12473 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12474 break;
12475 }
12476
12477 parser->current.end = breakpoint + 1;
12478 parser_flush_heredoc_end(parser);
12479 pm_token_buffer_flush(parser, &token_buffer);
12481 case '\\': {
12482 // Here we hit escapes.
12483 parser->current.end = breakpoint + 1;
12484
12485 // If we've hit the end of the file, then break out of
12486 // the loop by setting the breakpoint to NULL.
12487 if (parser->current.end == parser->end) {
12488 breakpoint = NULL;
12489 continue;
12490 }
12491
12492 pm_token_buffer_escape(parser, &token_buffer);
12493 uint8_t peeked = peek(parser);
12494
12495 switch (peeked) {
12496 case '\\':
12497 pm_token_buffer_push_byte(&token_buffer, '\\');
12498 parser->current.end++;
12499 break;
12500 case '\r':
12501 parser->current.end++;
12502 if (peek(parser) != '\n') {
12503 if (!lex_mode->as.string.interpolation) {
12504 pm_token_buffer_push_byte(&token_buffer, '\\');
12505 }
12506 pm_token_buffer_push_byte(&token_buffer, '\r');
12507 break;
12508 }
12510 case '\n':
12511 if (!lex_mode->as.string.interpolation) {
12512 pm_token_buffer_push_byte(&token_buffer, '\\');
12513 pm_token_buffer_push_byte(&token_buffer, '\n');
12514 }
12515
12516 if (parser->heredoc_end) {
12517 // ... if we are on the same line as a heredoc,
12518 // flush the heredoc and continue parsing after
12519 // heredoc_end.
12520 parser_flush_heredoc_end(parser);
12521 pm_token_buffer_copy(parser, &token_buffer);
12523 } else {
12524 // ... else track the newline.
12525 pm_newline_list_append(&parser->newline_list, parser->current.end);
12526 }
12527
12528 parser->current.end++;
12529 break;
12530 default:
12531 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12532 pm_token_buffer_push_byte(&token_buffer, peeked);
12533 parser->current.end++;
12534 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12535 pm_token_buffer_push_byte(&token_buffer, peeked);
12536 parser->current.end++;
12537 } else if (lex_mode->as.string.interpolation) {
12538 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12539 } else {
12540 pm_token_buffer_push_byte(&token_buffer, '\\');
12541 pm_token_buffer_push_escaped(&token_buffer, parser);
12542 }
12543
12544 break;
12545 }
12546
12547 token_buffer.cursor = parser->current.end;
12548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12549 break;
12550 }
12551 case '#': {
12552 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12553
12554 if (type == PM_TOKEN_NOT_PROVIDED) {
12555 // If we haven't returned at this point then we had something that
12556 // looked like an interpolated class or instance variable like "#@"
12557 // but wasn't actually. In this case we'll just skip to the next
12558 // breakpoint.
12559 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12560 break;
12561 }
12562
12564 pm_token_buffer_flush(parser, &token_buffer);
12565 }
12566
12567 LEX(type);
12568 }
12569 default:
12570 assert(false && "unreachable");
12571 }
12572 }
12573
12574 if (parser->current.end > parser->current.start) {
12575 pm_token_buffer_flush(parser, &token_buffer);
12577 }
12578
12579 // If we've hit the end of the string, then this is an unterminated
12580 // string. In that case we'll return a string content token.
12581 parser->current.end = parser->end;
12582 pm_token_buffer_flush(parser, &token_buffer);
12584 }
12585 case PM_LEX_HEREDOC: {
12586 // First, we'll set to start of this token.
12587 if (parser->next_start == NULL) {
12588 parser->current.start = parser->current.end;
12589 } else {
12590 parser->current.start = parser->next_start;
12591 parser->current.end = parser->next_start;
12592 parser->heredoc_end = NULL;
12593 parser->next_start = NULL;
12594 }
12595
12596 // Now let's grab the information about the identifier off of the
12597 // current lex mode.
12598 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12599 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12600
12601 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12602 lex_mode->as.heredoc.line_continuation = false;
12603
12604 // We'll check if we're at the end of the file. If we are, then we
12605 // will add an error (because we weren't able to find the
12606 // terminator) but still continue parsing so that content after the
12607 // declaration of the heredoc can be parsed.
12608 if (parser->current.end >= parser->end) {
12609 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12610 parser->next_start = lex_mode->as.heredoc.next_start;
12611 parser->heredoc_end = parser->current.end;
12612 lex_state_set(parser, PM_LEX_STATE_END);
12613 lex_mode_pop(parser);
12615 }
12616
12617 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12618 size_t ident_length = heredoc_lex_mode->ident_length;
12619
12620 // If we are immediately following a newline and we have hit the
12621 // terminator, then we need to return the ending of the heredoc.
12622 if (current_token_starts_line(parser)) {
12623 const uint8_t *start = parser->current.start;
12624
12625 if (!line_continuation && (start + ident_length <= parser->end)) {
12626 const uint8_t *newline = next_newline(start, parser->end - start);
12627 const uint8_t *ident_end = newline;
12628 const uint8_t *terminator_end = newline;
12629
12630 if (newline == NULL) {
12631 terminator_end = parser->end;
12632 ident_end = parser->end;
12633 } else {
12634 terminator_end++;
12635 if (newline[-1] == '\r') {
12636 ident_end--; // Remove \r
12637 }
12638 }
12639
12640 const uint8_t *terminator_start = ident_end - ident_length;
12641 const uint8_t *cursor = start;
12642
12643 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12644 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12645 cursor++;
12646 }
12647 }
12648
12649 if (
12650 (cursor == terminator_start) &&
12651 (memcmp(terminator_start, ident_start, ident_length) == 0)
12652 ) {
12653 if (newline != NULL) {
12654 pm_newline_list_append(&parser->newline_list, newline);
12655 }
12656
12657 parser->current.end = terminator_end;
12658 if (*lex_mode->as.heredoc.next_start == '\\') {
12659 parser->next_start = NULL;
12660 } else {
12661 parser->next_start = lex_mode->as.heredoc.next_start;
12662 parser->heredoc_end = parser->current.end;
12663 }
12664
12665 lex_state_set(parser, PM_LEX_STATE_END);
12666 lex_mode_pop(parser);
12668 }
12669 }
12670
12671 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12672 if (
12673 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12674 lex_mode->as.heredoc.common_whitespace != NULL &&
12675 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12676 peek_at(parser, start) != '\n'
12677 ) {
12678 *lex_mode->as.heredoc.common_whitespace = whitespace;
12679 }
12680 }
12681
12682 // Otherwise we'll be parsing string content. These are the places
12683 // where we need to split up the content of the heredoc. We'll use
12684 // strpbrk to find the first of these characters.
12685 uint8_t breakpoints[] = "\r\n\\#";
12686
12687 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12688 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12689 breakpoints[3] = '\0';
12690 }
12691
12692 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12693 pm_token_buffer_t token_buffer = { 0 };
12694 bool was_line_continuation = false;
12695
12696 while (breakpoint != NULL) {
12697 switch (*breakpoint) {
12698 case '\0':
12699 // Skip directly past the null character.
12700 parser->current.end = breakpoint + 1;
12701 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12702 break;
12703 case '\r':
12704 parser->current.end = breakpoint + 1;
12705
12706 if (peek_at(parser, breakpoint + 1) != '\n') {
12707 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12708 break;
12709 }
12710
12711 // If we hit a \r\n sequence, then we want to replace it
12712 // with a single \n character in the final string.
12713 breakpoint++;
12714 pm_token_buffer_escape(parser, &token_buffer);
12715 token_buffer.cursor = breakpoint;
12716
12718 case '\n': {
12719 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12720 parser_flush_heredoc_end(parser);
12721 parser->current.end = breakpoint + 1;
12722 pm_token_buffer_flush(parser, &token_buffer);
12724 }
12725
12726 pm_newline_list_append(&parser->newline_list, breakpoint);
12727
12728 // If we have a - or ~ heredoc, then we can match after
12729 // some leading whitespace.
12730 const uint8_t *start = breakpoint + 1;
12731
12732 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12733 // We want to match the terminator starting from the end of the line in case
12734 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12735 const uint8_t *newline = next_newline(start, parser->end - start);
12736
12737 if (newline == NULL) {
12738 newline = parser->end;
12739 } else if (newline[-1] == '\r') {
12740 newline--; // Remove \r
12741 }
12742
12743 // Start of a possible terminator.
12744 const uint8_t *terminator_start = newline - ident_length;
12745
12746 // Cursor to check for the leading whitespace. We skip the
12747 // leading whitespace if we have a - or ~ heredoc.
12748 const uint8_t *cursor = start;
12749
12750 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12751 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12752 cursor++;
12753 }
12754 }
12755
12756 if (
12757 cursor == terminator_start &&
12758 (memcmp(terminator_start, ident_start, ident_length) == 0)
12759 ) {
12760 parser->current.end = breakpoint + 1;
12761 pm_token_buffer_flush(parser, &token_buffer);
12763 }
12764 }
12765
12766 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12767
12768 // If we have hit a newline that is followed by a valid
12769 // terminator, then we need to return the content of the
12770 // heredoc here as string content. Then, the next time a
12771 // token is lexed, it will match again and return the
12772 // end of the heredoc.
12773 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12774 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12775 *lex_mode->as.heredoc.common_whitespace = whitespace;
12776 }
12777
12778 parser->current.end = breakpoint + 1;
12779 pm_token_buffer_flush(parser, &token_buffer);
12781 }
12782
12783 // Otherwise we hit a newline and it wasn't followed by
12784 // a terminator, so we can continue parsing.
12785 parser->current.end = breakpoint + 1;
12786 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12787 break;
12788 }
12789 case '\\': {
12790 // If we hit an escape, then we need to skip past
12791 // however many characters the escape takes up. However
12792 // it's important that if \n or \r\n are escaped, we
12793 // stop looping before the newline and not after the
12794 // newline so that we can still potentially find the
12795 // terminator of the heredoc.
12796 parser->current.end = breakpoint + 1;
12797
12798 // If we've hit the end of the file, then break out of
12799 // the loop by setting the breakpoint to NULL.
12800 if (parser->current.end == parser->end) {
12801 breakpoint = NULL;
12802 continue;
12803 }
12804
12805 pm_token_buffer_escape(parser, &token_buffer);
12806 uint8_t peeked = peek(parser);
12807
12808 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12809 switch (peeked) {
12810 case '\r':
12811 parser->current.end++;
12812 if (peek(parser) != '\n') {
12813 pm_token_buffer_push_byte(&token_buffer, '\\');
12814 pm_token_buffer_push_byte(&token_buffer, '\r');
12815 break;
12816 }
12818 case '\n':
12819 pm_token_buffer_push_byte(&token_buffer, '\\');
12820 pm_token_buffer_push_byte(&token_buffer, '\n');
12821 token_buffer.cursor = parser->current.end + 1;
12822 breakpoint = parser->current.end;
12823 continue;
12824 default:
12825 pm_token_buffer_push_byte(&token_buffer, '\\');
12826 pm_token_buffer_push_escaped(&token_buffer, parser);
12827 break;
12828 }
12829 } else {
12830 switch (peeked) {
12831 case '\r':
12832 parser->current.end++;
12833 if (peek(parser) != '\n') {
12834 pm_token_buffer_push_byte(&token_buffer, '\r');
12835 break;
12836 }
12838 case '\n':
12839 // If we are in a tilde here, we should
12840 // break out of the loop and return the
12841 // string content.
12842 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12843 const uint8_t *end = parser->current.end;
12844 pm_newline_list_append(&parser->newline_list, end);
12845
12846 // Here we want the buffer to only
12847 // include up to the backslash.
12848 parser->current.end = breakpoint;
12849 pm_token_buffer_flush(parser, &token_buffer);
12850
12851 // Now we can advance the end of the
12852 // token past the newline.
12853 parser->current.end = end + 1;
12854 lex_mode->as.heredoc.line_continuation = true;
12856 }
12857
12858 was_line_continuation = true;
12859 token_buffer.cursor = parser->current.end + 1;
12860 breakpoint = parser->current.end;
12861 continue;
12862 default:
12863 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12864 break;
12865 }
12866 }
12867
12868 token_buffer.cursor = parser->current.end;
12869 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12870 break;
12871 }
12872 case '#': {
12873 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12874
12875 if (type == PM_TOKEN_NOT_PROVIDED) {
12876 // If we haven't returned at this point then we had
12877 // something that looked like an interpolated class
12878 // or instance variable like "#@" but wasn't
12879 // actually. In this case we'll just skip to the
12880 // next breakpoint.
12881 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12882 break;
12883 }
12884
12886 pm_token_buffer_flush(parser, &token_buffer);
12887 }
12888
12889 LEX(type);
12890 }
12891 default:
12892 assert(false && "unreachable");
12893 }
12894
12895 was_line_continuation = false;
12896 }
12897
12898 if (parser->current.end > parser->current.start) {
12899 parser->current.end = parser->end;
12900 pm_token_buffer_flush(parser, &token_buffer);
12902 }
12903
12904 // If we've hit the end of the string, then this is an unterminated
12905 // heredoc. In that case we'll return a string content token.
12906 parser->current.end = parser->end;
12907 pm_token_buffer_flush(parser, &token_buffer);
12909 }
12910 }
12911
12912 assert(false && "unreachable");
12913}
12914
12915#undef LEX
12916
12917/******************************************************************************/
12918/* Parse functions */
12919/******************************************************************************/
12920
12929typedef enum {
12930 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12931 PM_BINDING_POWER_STATEMENT = 2,
12932 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12933 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12934 PM_BINDING_POWER_COMPOSITION = 8, // and or
12935 PM_BINDING_POWER_NOT = 10, // not
12936 PM_BINDING_POWER_MATCH = 12, // => in
12937 PM_BINDING_POWER_DEFINED = 14, // defined?
12938 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12939 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12940 PM_BINDING_POWER_TERNARY = 20, // ?:
12941 PM_BINDING_POWER_RANGE = 22, // .. ...
12942 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12943 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12944 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12945 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12946 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12947 PM_BINDING_POWER_BITWISE_AND = 34, // &
12948 PM_BINDING_POWER_SHIFT = 36, // << >>
12949 PM_BINDING_POWER_TERM = 38, // + -
12950 PM_BINDING_POWER_FACTOR = 40, // * / %
12951 PM_BINDING_POWER_UMINUS = 42, // -@
12952 PM_BINDING_POWER_EXPONENT = 44, // **
12953 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12954 PM_BINDING_POWER_INDEX = 48, // [] []=
12955 PM_BINDING_POWER_CALL = 50, // :: .
12956 PM_BINDING_POWER_MAX = 52
12957} pm_binding_power_t;
12958
12963typedef struct {
12965 pm_binding_power_t left;
12966
12968 pm_binding_power_t right;
12969
12972
12979
12980#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12981#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12982#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12983#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12984#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12985
12986pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12987 // rescue
12988 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12989
12990 // if unless until while
12991 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12992 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12993 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12994 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12995
12996 // and or
12997 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12998 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12999
13000 // => in
13001 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13002 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13003
13004 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
13005 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13006 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13007 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13008 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13009 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13010 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13011 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13012 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13013 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13014 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13015 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13016 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13017 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13018 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13019
13020 // ?:
13021 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13022
13023 // .. ...
13024 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13025 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13026 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13027 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13028
13029 // ||
13030 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13031
13032 // &&
13033 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13034
13035 // != !~ == === =~ <=>
13036 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13037 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13038 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13039 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13040 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13041 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13042
13043 // > >= < <=
13044 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13045 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13046 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13047 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13048
13049 // ^ |
13050 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13051 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13052
13053 // &
13054 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13055
13056 // >> <<
13057 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13058 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13059
13060 // - +
13061 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13062 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13063
13064 // % / *
13065 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13066 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13067 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13068 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13069
13070 // -@
13071 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13072 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13073
13074 // **
13075 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13076 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13077
13078 // ! ~ +@
13079 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13080 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13081 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13082
13083 // [
13084 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13085
13086 // :: . &.
13087 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13088 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13089 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13090};
13091
13092#undef BINDING_POWER_ASSIGNMENT
13093#undef LEFT_ASSOCIATIVE
13094#undef RIGHT_ASSOCIATIVE
13095#undef RIGHT_ASSOCIATIVE_UNARY
13096
13100static inline bool
13101match1(const pm_parser_t *parser, pm_token_type_t type) {
13102 return parser->current.type == type;
13103}
13104
13108static inline bool
13109match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13110 return match1(parser, type1) || match1(parser, type2);
13111}
13112
13116static inline bool
13117match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13118 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13119}
13120
13124static inline bool
13125match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13126 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13127}
13128
13132static inline bool
13133match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13134 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13135}
13136
13140static inline bool
13141match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13142 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13143}
13144
13148static inline bool
13149match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13150 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13151}
13152
13159static bool
13160accept1(pm_parser_t *parser, pm_token_type_t type) {
13161 if (match1(parser, type)) {
13162 parser_lex(parser);
13163 return true;
13164 }
13165 return false;
13166}
13167
13172static inline bool
13173accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13174 if (match2(parser, type1, type2)) {
13175 parser_lex(parser);
13176 return true;
13177 }
13178 return false;
13179}
13180
13192static void
13193expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13194 if (accept1(parser, type)) return;
13195
13196 const uint8_t *location = parser->previous.end;
13197 pm_parser_err(parser, location, location, diag_id);
13198
13199 parser->previous.start = location;
13200 parser->previous.type = PM_TOKEN_MISSING;
13201}
13202
13207static void
13208expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13209 if (accept2(parser, type1, type2)) return;
13210
13211 const uint8_t *location = parser->previous.end;
13212 pm_parser_err(parser, location, location, diag_id);
13213
13214 parser->previous.start = location;
13215 parser->previous.type = PM_TOKEN_MISSING;
13216}
13217
13222static void
13223expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13224 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13225 parser_lex(parser);
13226 } else {
13227 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13228 parser->previous.start = parser->previous.end;
13229 parser->previous.type = PM_TOKEN_MISSING;
13230 }
13231}
13232
13233static pm_node_t *
13234parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13235
13240static pm_node_t *
13241parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13242 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13243 pm_assert_value_expression(parser, node);
13244 return node;
13245}
13246
13265static inline bool
13266token_begins_expression_p(pm_token_type_t type) {
13267 switch (type) {
13270 // We need to special case this because it is a binary operator that
13271 // should not be marked as beginning an expression.
13272 return false;
13275 case PM_TOKEN_COLON:
13276 case PM_TOKEN_COMMA:
13278 case PM_TOKEN_EOF:
13289 case PM_TOKEN_NEWLINE:
13291 case PM_TOKEN_SEMICOLON:
13292 // The reason we need this short-circuit is because we're using the
13293 // binding powers table to tell us if the subsequent token could
13294 // potentially be the start of an expression. If there _is_ a binding
13295 // power for one of these tokens, then we should remove it from this list
13296 // and let it be handled by the default case below.
13297 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13298 return false;
13300 // This is a special case because this unary operator cannot appear
13301 // as a general operator, it only appears in certain circumstances.
13302 return false;
13304 case PM_TOKEN_UMINUS:
13306 case PM_TOKEN_UPLUS:
13307 case PM_TOKEN_BANG:
13308 case PM_TOKEN_TILDE:
13309 case PM_TOKEN_UDOT_DOT:
13311 // These unary tokens actually do have binding power associated with them
13312 // so that we can correctly place them into the precedence order. But we
13313 // want them to be marked as beginning an expression, so we need to
13314 // special case them here.
13315 return true;
13316 default:
13317 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13318 }
13319}
13320
13325static pm_node_t *
13326parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13327 if (accept1(parser, PM_TOKEN_USTAR)) {
13328 pm_token_t operator = parser->previous;
13329 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13330 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13331 }
13332
13333 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13334}
13335
13340static void
13341parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13342 // The method name needs to change. If we previously had
13343 // foo, we now need foo=. In this case we'll allocate a new
13344 // owned string, copy the previous method name in, and
13345 // append an =.
13346 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13347 size_t length = constant->length;
13348 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13349 if (name == NULL) return;
13350
13351 memcpy(name, constant->start, length);
13352 name[length] = '=';
13353
13354 // Now switch the name to the new string.
13355 // This silences clang analyzer warning about leak of memory pointed by `name`.
13356 // NOLINTNEXTLINE(clang-analyzer-*)
13357 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13358}
13359
13366static pm_node_t *
13367parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13368 switch (PM_NODE_TYPE(target)) {
13369 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13370 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13371 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13372 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13373 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13374 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13375 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13376 default: break;
13377 }
13378
13379 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13380 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13381
13382 pm_node_destroy(parser, target);
13383 return (pm_node_t *) result;
13384}
13385
13391static void
13392parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13393 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13394
13395 for (size_t index = 0; index < implicit_parameters->size; index++) {
13396 if (implicit_parameters->nodes[index] == node) {
13397 // If the node is not the last one in the list, we need to shift the
13398 // remaining nodes down to fill the gap. This is extremely unlikely
13399 // to happen.
13400 if (index != implicit_parameters->size - 1) {
13401 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13402 }
13403
13404 implicit_parameters->size--;
13405 break;
13406 }
13407 }
13408}
13409
13418static pm_node_t *
13419parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13420 switch (PM_NODE_TYPE(target)) {
13421 case PM_MISSING_NODE:
13422 return target;
13424 case PM_FALSE_NODE:
13427 case PM_NIL_NODE:
13428 case PM_SELF_NODE:
13429 case PM_TRUE_NODE: {
13430 // In these special cases, we have specific error messages and we
13431 // will replace them with local variable writes.
13432 return parse_unwriteable_target(parser, target);
13433 }
13437 return target;
13439 if (context_def_p(parser)) {
13440 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13441 }
13442
13445
13446 return target;
13448 if (context_def_p(parser)) {
13449 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13450 }
13451
13452 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13453 target->type = PM_CONSTANT_TARGET_NODE;
13454
13455 return target;
13458 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13459 return target;
13463 return target;
13465 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13466 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13467 parse_target_implicit_parameter(parser, target);
13468 }
13469
13470 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13471 uint32_t name = cast->name;
13472 uint32_t depth = cast->depth;
13473 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13474
13477
13478 return target;
13479 }
13481 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13482 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13483
13484 parse_target_implicit_parameter(parser, target);
13485 pm_node_destroy(parser, target);
13486
13487 return node;
13488 }
13492 return target;
13494 if (splat_parent) {
13495 // Multi target is not accepted in all positions. If this is one
13496 // of them, then we need to add an error.
13497 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13498 }
13499
13500 return target;
13501 case PM_SPLAT_NODE: {
13502 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13503
13504 if (splat->expression != NULL) {
13505 splat->expression = parse_target(parser, splat->expression, multiple, true);
13506 }
13507
13508 return (pm_node_t *) splat;
13509 }
13510 case PM_CALL_NODE: {
13511 pm_call_node_t *call = (pm_call_node_t *) target;
13512
13513 // If we have no arguments to the call node and we need this to be a
13514 // target then this is either a method call or a local variable
13515 // write.
13516 if (
13517 (call->message_loc.start != NULL) &&
13518 (call->message_loc.end[-1] != '!') &&
13519 (call->message_loc.end[-1] != '?') &&
13520 (call->opening_loc.start == NULL) &&
13521 (call->arguments == NULL) &&
13522 (call->block == NULL)
13523 ) {
13524 if (call->receiver == NULL) {
13525 // When we get here, we have a local variable write, because it
13526 // was previously marked as a method call but now we have an =.
13527 // This looks like:
13528 //
13529 // foo = 1
13530 //
13531 // When it was parsed in the prefix position, foo was seen as a
13532 // method call with no receiver and no arguments. Now we have an
13533 // =, so we know it's a local variable write.
13534 const pm_location_t message_loc = call->message_loc;
13535
13536 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13537 pm_node_destroy(parser, target);
13538
13539 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13540 }
13541
13542 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13543 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13544 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13545 }
13546
13547 parse_write_name(parser, &call->name);
13548 return (pm_node_t *) pm_call_target_node_create(parser, call);
13549 }
13550 }
13551
13552 // If there is no call operator and the message is "[]" then this is
13553 // an aref expression, and we can transform it into an aset
13554 // expression.
13555 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13556 return (pm_node_t *) pm_index_target_node_create(parser, call);
13557 }
13558 }
13560 default:
13561 // In this case we have a node that we don't know how to convert
13562 // into a target. We need to treat it as an error. For now, we'll
13563 // mark it as an error and just skip right past it.
13564 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13565 return target;
13566 }
13567}
13568
13573static pm_node_t *
13574parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13575 pm_node_t *result = parse_target(parser, target, multiple, false);
13576
13577 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13578 // parens after the targets.
13579 if (
13580 !match1(parser, PM_TOKEN_EQUAL) &&
13581 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13582 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13583 ) {
13584 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13585 }
13586
13587 return result;
13588}
13589
13594static pm_node_t *
13595parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13596 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13597
13598 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13599 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13600 }
13601
13602 return write;
13603}
13604
13608static pm_node_t *
13609parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13610 switch (PM_NODE_TYPE(target)) {
13611 case PM_MISSING_NODE:
13612 pm_node_destroy(parser, value);
13613 return target;
13615 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13616 pm_node_destroy(parser, target);
13617 return (pm_node_t *) node;
13618 }
13619 case PM_CONSTANT_PATH_NODE: {
13620 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13621
13622 if (context_def_p(parser)) {
13623 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13624 }
13625
13626 return parse_shareable_constant_write(parser, node);
13627 }
13628 case PM_CONSTANT_READ_NODE: {
13629 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13630
13631 if (context_def_p(parser)) {
13632 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13633 }
13634
13635 pm_node_destroy(parser, target);
13636 return parse_shareable_constant_write(parser, node);
13637 }
13640 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13643 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13644 pm_node_destroy(parser, target);
13645 return (pm_node_t *) node;
13646 }
13649
13650 pm_constant_id_t name = local_read->name;
13651 pm_location_t name_loc = target->location;
13652
13653 uint32_t depth = local_read->depth;
13654 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13655
13656 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13657 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13658 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13659 parse_target_implicit_parameter(parser, target);
13660 }
13661
13662 pm_locals_unread(&scope->locals, name);
13663 pm_node_destroy(parser, target);
13664
13665 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13666 }
13668 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13669 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13670
13671 parse_target_implicit_parameter(parser, target);
13672 pm_node_destroy(parser, target);
13673
13674 return node;
13675 }
13677 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13678 pm_node_destroy(parser, target);
13679 return write_node;
13680 }
13682 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13683 case PM_SPLAT_NODE: {
13684 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13685
13686 if (splat->expression != NULL) {
13687 splat->expression = parse_write(parser, splat->expression, operator, value);
13688 }
13689
13690 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13691 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13692
13693 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13694 }
13695 case PM_CALL_NODE: {
13696 pm_call_node_t *call = (pm_call_node_t *) target;
13697
13698 // If we have no arguments to the call node and we need this to be a
13699 // target then this is either a method call or a local variable
13700 // write.
13701 if (
13702 (call->message_loc.start != NULL) &&
13703 (call->message_loc.end[-1] != '!') &&
13704 (call->message_loc.end[-1] != '?') &&
13705 (call->opening_loc.start == NULL) &&
13706 (call->arguments == NULL) &&
13707 (call->block == NULL)
13708 ) {
13709 if (call->receiver == NULL) {
13710 // When we get here, we have a local variable write, because it
13711 // was previously marked as a method call but now we have an =.
13712 // This looks like:
13713 //
13714 // foo = 1
13715 //
13716 // When it was parsed in the prefix position, foo was seen as a
13717 // method call with no receiver and no arguments. Now we have an
13718 // =, so we know it's a local variable write.
13719 const pm_location_t message = call->message_loc;
13720
13721 pm_parser_local_add_location(parser, message.start, message.end, 0);
13722 pm_node_destroy(parser, target);
13723
13724 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13725 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13726
13727 pm_refute_numbered_parameter(parser, message.start, message.end);
13728 return target;
13729 }
13730
13731 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13732 // When we get here, we have a method call, because it was
13733 // previously marked as a method call but now we have an =. This
13734 // looks like:
13735 //
13736 // foo.bar = 1
13737 //
13738 // When it was parsed in the prefix position, foo.bar was seen as a
13739 // method call with no arguments. Now we have an =, so we know it's
13740 // a method call with an argument. In this case we will create the
13741 // arguments node, parse the argument, and add it to the list.
13742 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13743 call->arguments = arguments;
13744
13745 pm_arguments_node_arguments_append(arguments, value);
13746 call->base.location.end = arguments->base.location.end;
13747
13748 parse_write_name(parser, &call->name);
13749 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13750
13751 return (pm_node_t *) call;
13752 }
13753 }
13754
13755 // If there is no call operator and the message is "[]" then this is
13756 // an aref expression, and we can transform it into an aset
13757 // expression.
13758 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13759 if (call->arguments == NULL) {
13760 call->arguments = pm_arguments_node_create(parser);
13761 }
13762
13763 pm_arguments_node_arguments_append(call->arguments, value);
13764 target->location.end = value->location.end;
13765
13766 // Replace the name with "[]=".
13767 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13768
13769 // Ensure that the arguments for []= don't contain keywords
13770 pm_index_arguments_check(parser, call->arguments, call->block);
13771 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13772
13773 return target;
13774 }
13775
13776 // If there are arguments on the call node, then it can't be a method
13777 // call ending with = or a local variable write, so it must be a
13778 // syntax error. In this case we'll fall through to our default
13779 // handling. We need to free the value that we parsed because there
13780 // is no way for us to attach it to the tree at this point.
13781 pm_node_destroy(parser, value);
13782 }
13784 default:
13785 // In this case we have a node that we don't know how to convert into a
13786 // target. We need to treat it as an error. For now, we'll mark it as an
13787 // error and just skip right past it.
13788 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13789 return target;
13790 }
13791}
13792
13799static pm_node_t *
13800parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13801 switch (PM_NODE_TYPE(target)) {
13802 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13803 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13804 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13805 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13806 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13807 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13808 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13809 default: break;
13810 }
13811
13812 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13813 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13814
13815 pm_node_destroy(parser, target);
13816 return (pm_node_t *) result;
13817}
13818
13829static pm_node_t *
13830parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13831 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13832
13833 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13834 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13835
13836 while (accept1(parser, PM_TOKEN_COMMA)) {
13837 if (accept1(parser, PM_TOKEN_USTAR)) {
13838 // Here we have a splat operator. It can have a name or be
13839 // anonymous. It can be the final target or be in the middle if
13840 // there haven't been any others yet.
13841 if (has_rest) {
13842 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13843 }
13844
13845 pm_token_t star_operator = parser->previous;
13846 pm_node_t *name = NULL;
13847
13848 if (token_begins_expression_p(parser->current.type)) {
13849 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13850 name = parse_target(parser, name, true, true);
13851 }
13852
13853 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13854 pm_multi_target_node_targets_append(parser, result, splat);
13855 has_rest = true;
13856 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13857 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13858 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13859 target = parse_target(parser, target, true, false);
13860
13861 pm_multi_target_node_targets_append(parser, result, target);
13862 context_pop(parser);
13863 } else if (token_begins_expression_p(parser->current.type)) {
13864 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13865 target = parse_target(parser, target, true, false);
13866
13867 pm_multi_target_node_targets_append(parser, result, target);
13868 } else if (!match1(parser, PM_TOKEN_EOF)) {
13869 // If we get here, then we have a trailing , in a multi target node.
13870 // We'll add an implicit rest node to represent this.
13871 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13872 pm_multi_target_node_targets_append(parser, result, rest);
13873 break;
13874 }
13875 }
13876
13877 return (pm_node_t *) result;
13878}
13879
13884static pm_node_t *
13885parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13886 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13887 accept1(parser, PM_TOKEN_NEWLINE);
13888
13889 // Ensure that we have either an = or a ) after the targets.
13890 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13891 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13892 }
13893
13894 return result;
13895}
13896
13900static pm_statements_node_t *
13901parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13902 // First, skip past any optional terminators that might be at the beginning
13903 // of the statements.
13904 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13905
13906 // If we have a terminator, then we can just return NULL.
13907 if (context_terminator(context, &parser->current)) return NULL;
13908
13909 pm_statements_node_t *statements = pm_statements_node_create(parser);
13910
13911 // At this point we know we have at least one statement, and that it
13912 // immediately follows the current token.
13913 context_push(parser, context);
13914
13915 while (true) {
13916 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13917 pm_statements_node_body_append(parser, statements, node, true);
13918
13919 // If we're recovering from a syntax error, then we need to stop parsing
13920 // the statements now.
13921 if (parser->recovering) {
13922 // If this is the level of context where the recovery has happened,
13923 // then we can mark the parser as done recovering.
13924 if (context_terminator(context, &parser->current)) parser->recovering = false;
13925 break;
13926 }
13927
13928 // If we have a terminator, then we will parse all consecutive
13929 // terminators and then continue parsing the statements list.
13930 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13931 // If we have a terminator, then we will continue parsing the
13932 // statements list.
13933 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13934 if (context_terminator(context, &parser->current)) break;
13935
13936 // Now we can continue parsing the list of statements.
13937 continue;
13938 }
13939
13940 // At this point we have a list of statements that are not terminated by
13941 // a newline or semicolon. At this point we need to check if we're at
13942 // the end of the statements list. If we are, then we should break out
13943 // of the loop.
13944 if (context_terminator(context, &parser->current)) break;
13945
13946 // At this point, we have a syntax error, because the statement was not
13947 // terminated by a newline or semicolon, and we're not at the end of the
13948 // statements list. Ideally we should scan forward to determine if we
13949 // should insert a missing terminator or break out of parsing the
13950 // statements list at this point.
13951 //
13952 // We don't have that yet, so instead we'll do a more naive approach. If
13953 // we were unable to parse an expression, then we will skip past this
13954 // token and continue parsing the statements list. Otherwise we'll add
13955 // an error and continue parsing the statements list.
13956 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13957 parser_lex(parser);
13958
13959 // If we are at the end of the file, then we need to stop parsing
13960 // the statements entirely at this point. Mark the parser as
13961 // recovering, as we know that EOF closes the top-level context, and
13962 // then break out of the loop.
13963 if (match1(parser, PM_TOKEN_EOF)) {
13964 parser->recovering = true;
13965 break;
13966 }
13967
13968 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13969 if (context_terminator(context, &parser->current)) break;
13970 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13971 // This is an inlined version of accept1 because the error that we
13972 // want to add has varargs. If this happens again, we should
13973 // probably extract a helper function.
13974 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13975 parser->previous.start = parser->previous.end;
13976 parser->previous.type = PM_TOKEN_MISSING;
13977 }
13978 }
13979
13980 context_pop(parser);
13981 bool last_value = true;
13982 switch (context) {
13985 last_value = false;
13986 break;
13987 default:
13988 break;
13989 }
13990 pm_void_statements_check(parser, statements, last_value);
13991
13992 return statements;
13993}
13994
13999static void
14000pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14001 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
14002
14003 if (duplicated != NULL) {
14004 pm_buffer_t buffer = { 0 };
14005 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
14006
14007 pm_diagnostic_list_append_format(
14008 &parser->warning_list,
14009 duplicated->location.start,
14010 duplicated->location.end,
14011 PM_WARN_DUPLICATED_HASH_KEY,
14012 (int) pm_buffer_length(&buffer),
14013 pm_buffer_value(&buffer),
14014 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14015 );
14016
14017 pm_buffer_free(&buffer);
14018 }
14019}
14020
14025static void
14026pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14027 pm_node_t *previous;
14028
14029 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14030 pm_diagnostic_list_append_format(
14031 &parser->warning_list,
14032 node->location.start,
14033 node->location.end,
14034 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14035 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14036 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14037 );
14038 }
14039}
14040
14044static bool
14045parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14047 bool contains_keyword_splat = false;
14048
14049 while (true) {
14050 pm_node_t *element;
14051
14052 switch (parser->current.type) {
14053 case PM_TOKEN_USTAR_STAR: {
14054 parser_lex(parser);
14055 pm_token_t operator = parser->previous;
14056 pm_node_t *value = NULL;
14057
14058 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14059 // If we're about to parse a nested hash that is being
14060 // pushed into this hash directly with **, then we want the
14061 // inner hash to share the static literals with the outer
14062 // hash.
14063 parser->current_hash_keys = literals;
14064 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14065 } else if (token_begins_expression_p(parser->current.type)) {
14066 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14067 } else {
14068 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14069 }
14070
14071 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14072 contains_keyword_splat = true;
14073 break;
14074 }
14075 case PM_TOKEN_LABEL: {
14076 pm_token_t label = parser->current;
14077 parser_lex(parser);
14078
14079 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14080 pm_hash_key_static_literals_add(parser, literals, key);
14081
14082 pm_token_t operator = not_provided(parser);
14083 pm_node_t *value = NULL;
14084
14085 if (token_begins_expression_p(parser->current.type)) {
14086 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14087 } else {
14088 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14089 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14090 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14091 } else {
14092 int depth = -1;
14093 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14094
14095 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14096 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14097 } else {
14098 depth = pm_parser_local_depth(parser, &identifier);
14099 }
14100
14101 if (depth == -1) {
14102 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14103 } else {
14104 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14105 }
14106 }
14107
14108 value->location.end++;
14109 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14110 }
14111
14112 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14113 break;
14114 }
14115 default: {
14116 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14117
14118 // Hash keys that are strings are automatically frozen. We will
14119 // mark that here.
14120 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14121 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14122 }
14123
14124 pm_hash_key_static_literals_add(parser, literals, key);
14125
14126 pm_token_t operator;
14127 if (pm_symbol_node_label_p(key)) {
14128 operator = not_provided(parser);
14129 } else {
14130 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14131 operator = parser->previous;
14132 }
14133
14134 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14135 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14136 break;
14137 }
14138 }
14139
14140 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14141 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14142 } else {
14143 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14144 }
14145
14146 // If there's no comma after the element, then we're done.
14147 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14148
14149 // If the next element starts with a label or a **, then we know we have
14150 // another element in the hash, so we'll continue parsing.
14151 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14152
14153 // Otherwise we need to check if the subsequent token begins an expression.
14154 // If it does, then we'll continue parsing.
14155 if (token_begins_expression_p(parser->current.type)) continue;
14156
14157 // Otherwise by default we will exit out of this loop.
14158 break;
14159 }
14160
14161 return contains_keyword_splat;
14162}
14163
14167static inline void
14168parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14169 if (arguments->arguments == NULL) {
14170 arguments->arguments = pm_arguments_node_create(parser);
14171 }
14172
14173 pm_arguments_node_arguments_append(arguments->arguments, argument);
14174}
14175
14179static void
14180parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14181 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14182
14183 // First we need to check if the next token is one that could be the start
14184 // of an argument. If it's not, then we can just return.
14185 if (
14186 match2(parser, terminator, PM_TOKEN_EOF) ||
14187 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14188 context_terminator(parser->current_context->context, &parser->current)
14189 ) {
14190 return;
14191 }
14192
14193 bool parsed_first_argument = false;
14194 bool parsed_bare_hash = false;
14195 bool parsed_block_argument = false;
14196 bool parsed_forwarding_arguments = false;
14197
14198 while (!match1(parser, PM_TOKEN_EOF)) {
14199 if (parsed_forwarding_arguments) {
14200 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14201 }
14202
14203 pm_node_t *argument = NULL;
14204
14205 switch (parser->current.type) {
14207 case PM_TOKEN_LABEL: {
14208 if (parsed_bare_hash) {
14209 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14210 }
14211
14212 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14213 argument = (pm_node_t *) hash;
14214
14215 pm_static_literals_t hash_keys = { 0 };
14216 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14217
14218 parse_arguments_append(parser, arguments, argument);
14219
14221 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14222 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14223
14224 pm_static_literals_free(&hash_keys);
14225 parsed_bare_hash = true;
14226
14227 break;
14228 }
14229 case PM_TOKEN_UAMPERSAND: {
14230 parser_lex(parser);
14231 pm_token_t operator = parser->previous;
14232 pm_node_t *expression = NULL;
14233
14234 if (token_begins_expression_p(parser->current.type)) {
14235 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14236 } else {
14237 pm_parser_scope_forwarding_block_check(parser, &operator);
14238 }
14239
14240 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14241 if (parsed_block_argument) {
14242 parse_arguments_append(parser, arguments, argument);
14243 } else {
14244 arguments->block = argument;
14245 }
14246
14247 if (match1(parser, PM_TOKEN_COMMA)) {
14248 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14249 }
14250
14251 parsed_block_argument = true;
14252 break;
14253 }
14254 case PM_TOKEN_USTAR: {
14255 parser_lex(parser);
14256 pm_token_t operator = parser->previous;
14257
14259 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14260 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14261 if (parsed_bare_hash) {
14262 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14263 }
14264 } else {
14265 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14266
14267 if (parsed_bare_hash) {
14268 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14269 }
14270
14271 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14272 }
14273
14274 parse_arguments_append(parser, arguments, argument);
14275 break;
14276 }
14277 case PM_TOKEN_UDOT_DOT_DOT: {
14278 if (accepts_forwarding) {
14279 parser_lex(parser);
14280
14281 if (token_begins_expression_p(parser->current.type)) {
14282 // If the token begins an expression then this ... was
14283 // not actually argument forwarding but was instead a
14284 // range.
14285 pm_token_t operator = parser->previous;
14286 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14287
14288 // If we parse a range, we need to validate that we
14289 // didn't accidentally violate the nonassoc rules of the
14290 // ... operator.
14291 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14292 pm_range_node_t *range = (pm_range_node_t *) right;
14293 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14294 }
14295
14296 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14297 } else {
14298 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14299 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14300 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14301 }
14302
14303 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14304 parse_arguments_append(parser, arguments, argument);
14305 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14306 arguments->has_forwarding = true;
14307 parsed_forwarding_arguments = true;
14308 break;
14309 }
14310 }
14311 }
14313 default: {
14314 if (argument == NULL) {
14315 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14316 }
14317
14318 bool contains_keywords = false;
14319 bool contains_keyword_splat = false;
14320
14321 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14322 if (parsed_bare_hash) {
14323 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14324 }
14325
14326 pm_token_t operator;
14327 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14328 operator = parser->previous;
14329 } else {
14330 operator = not_provided(parser);
14331 }
14332
14333 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14334 contains_keywords = true;
14335
14336 // Create the set of static literals for this hash.
14337 pm_static_literals_t hash_keys = { 0 };
14338 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14339
14340 // Finish parsing the one we are part way through.
14341 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14342 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14343
14344 pm_keyword_hash_node_elements_append(bare_hash, argument);
14345 argument = (pm_node_t *) bare_hash;
14346
14347 // Then parse more if we have a comma
14348 if (accept1(parser, PM_TOKEN_COMMA) && (
14349 token_begins_expression_p(parser->current.type) ||
14350 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14351 )) {
14352 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14353 }
14354
14355 pm_static_literals_free(&hash_keys);
14356 parsed_bare_hash = true;
14357 }
14358
14359 parse_arguments_append(parser, arguments, argument);
14360
14361 pm_node_flags_t flags = 0;
14362 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14363 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14364 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14365
14366 break;
14367 }
14368 }
14369
14370 parsed_first_argument = true;
14371
14372 // If parsing the argument failed, we need to stop parsing arguments.
14373 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14374
14375 // If the terminator of these arguments is not EOF, then we have a
14376 // specific token we're looking for. In that case we can accept a
14377 // newline here because it is not functioning as a statement terminator.
14378 bool accepted_newline = false;
14379 if (terminator != PM_TOKEN_EOF) {
14380 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14381 }
14382
14383 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14384 // If we previously were on a comma and we just parsed a bare hash,
14385 // then we want to continue parsing arguments. This is because the
14386 // comma was grabbed up by the hash parser.
14387 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14388 // If there was a comma, then we need to check if we also accepted a
14389 // newline. If we did, then this is a syntax error.
14390 if (accepted_newline) {
14391 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14392 }
14393 } else {
14394 // If there is no comma at the end of the argument list then we're
14395 // done parsing arguments and can break out of this loop.
14396 break;
14397 }
14398
14399 // If we hit the terminator, then that means we have a trailing comma so
14400 // we can accept that output as well.
14401 if (match1(parser, terminator)) break;
14402 }
14403}
14404
14416parse_required_destructured_parameter(pm_parser_t *parser) {
14417 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14418
14419 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14420 pm_multi_target_node_opening_set(node, &parser->previous);
14421
14422 do {
14423 pm_node_t *param;
14424
14425 // If we get here then we have a trailing comma, which isn't allowed in
14426 // the grammar. In other places, multi targets _do_ allow trailing
14427 // commas, so here we'll assume this is a mistake of the user not
14428 // knowing it's not allowed here.
14429 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14430 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14431 pm_multi_target_node_targets_append(parser, node, param);
14432 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14433 break;
14434 }
14435
14436 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14437 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14438 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14439 pm_token_t star = parser->previous;
14440 pm_node_t *value = NULL;
14441
14442 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14443 pm_token_t name = parser->previous;
14444 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14445 if (pm_parser_parameter_name_check(parser, &name)) {
14446 pm_node_flag_set_repeated_parameter(value);
14447 }
14448 pm_parser_local_add_token(parser, &name, 1);
14449 }
14450
14451 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14452 } else {
14453 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14454 pm_token_t name = parser->previous;
14455
14456 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14457 if (pm_parser_parameter_name_check(parser, &name)) {
14458 pm_node_flag_set_repeated_parameter(param);
14459 }
14460 pm_parser_local_add_token(parser, &name, 1);
14461 }
14462
14463 pm_multi_target_node_targets_append(parser, node, param);
14464 } while (accept1(parser, PM_TOKEN_COMMA));
14465
14466 accept1(parser, PM_TOKEN_NEWLINE);
14467 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14468 pm_multi_target_node_closing_set(node, &parser->previous);
14469
14470 return node;
14471}
14472
14477typedef enum {
14478 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14479 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14480 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14481 PM_PARAMETERS_ORDER_KEYWORDS,
14482 PM_PARAMETERS_ORDER_REST,
14483 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14484 PM_PARAMETERS_ORDER_OPTIONAL,
14485 PM_PARAMETERS_ORDER_NAMED,
14486 PM_PARAMETERS_ORDER_NONE,
14487} pm_parameters_order_t;
14488
14492static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14493 [0] = PM_PARAMETERS_NO_CHANGE,
14494 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14495 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14496 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14497 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14498 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14499 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14500 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14501 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14502 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14503 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14504 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14505};
14506
14514static bool
14515update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14516 pm_parameters_order_t state = parameters_ordering[token->type];
14517 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14518
14519 // If we see another ordered argument after a optional argument
14520 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14521 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14522 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14523 return true;
14524 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14525 return true;
14526 }
14527
14528 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14529 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14530 return false;
14531 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14532 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14533 return false;
14534 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14535 // We know what transition we failed on, so we can provide a better error here.
14536 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14537 return false;
14538 }
14539
14540 if (state < *current) *current = state;
14541 return true;
14542}
14543
14547static pm_parameters_node_t *
14548parse_parameters(
14549 pm_parser_t *parser,
14550 pm_binding_power_t binding_power,
14551 bool uses_parentheses,
14552 bool allows_trailing_comma,
14553 bool allows_forwarding_parameters,
14554 bool accepts_blocks_in_defaults,
14555 bool in_block,
14556 uint16_t depth
14557) {
14558 pm_do_loop_stack_push(parser, false);
14559
14560 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14561 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14562
14563 while (true) {
14564 bool parsing = true;
14565
14566 switch (parser->current.type) {
14568 update_parameter_state(parser, &parser->current, &order);
14569 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14570
14571 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14572 pm_parameters_node_requireds_append(params, param);
14573 } else {
14574 pm_parameters_node_posts_append(params, param);
14575 }
14576 break;
14577 }
14579 case PM_TOKEN_AMPERSAND: {
14580 update_parameter_state(parser, &parser->current, &order);
14581 parser_lex(parser);
14582
14583 pm_token_t operator = parser->previous;
14584 pm_token_t name;
14585
14586 bool repeated = false;
14587 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14588 name = parser->previous;
14589 repeated = pm_parser_parameter_name_check(parser, &name);
14590 pm_parser_local_add_token(parser, &name, 1);
14591 } else {
14592 name = not_provided(parser);
14593 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14594 }
14595
14596 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14597 if (repeated) {
14598 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14599 }
14600 if (params->block == NULL) {
14601 pm_parameters_node_block_set(params, param);
14602 } else {
14603 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14604 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14605 }
14606
14607 break;
14608 }
14609 case PM_TOKEN_UDOT_DOT_DOT: {
14610 if (!allows_forwarding_parameters) {
14611 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14612 }
14613
14614 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14615 parser_lex(parser);
14616
14617 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14618 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14619
14620 if (params->keyword_rest != NULL) {
14621 // If we already have a keyword rest parameter, then we replace it with the
14622 // forwarding parameter and move the keyword rest parameter to the posts list.
14623 pm_node_t *keyword_rest = params->keyword_rest;
14624 pm_parameters_node_posts_append(params, keyword_rest);
14625 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14626 params->keyword_rest = NULL;
14627 }
14628
14629 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14630 break;
14631 }
14634 case PM_TOKEN_CONSTANT:
14637 case PM_TOKEN_METHOD_NAME: {
14638 parser_lex(parser);
14639 switch (parser->previous.type) {
14640 case PM_TOKEN_CONSTANT:
14641 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14642 break;
14644 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14645 break;
14647 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14648 break;
14650 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14651 break;
14653 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14654 break;
14655 default: break;
14656 }
14657
14658 if (parser->current.type == PM_TOKEN_EQUAL) {
14659 update_parameter_state(parser, &parser->current, &order);
14660 } else {
14661 update_parameter_state(parser, &parser->previous, &order);
14662 }
14663
14664 pm_token_t name = parser->previous;
14665 bool repeated = pm_parser_parameter_name_check(parser, &name);
14666 pm_parser_local_add_token(parser, &name, 1);
14667
14668 if (match1(parser, PM_TOKEN_EQUAL)) {
14669 pm_token_t operator = parser->current;
14670 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14671 parser_lex(parser);
14672
14673 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14674 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14675
14676 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14677 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14678 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14679
14680 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14681
14682 if (repeated) {
14683 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14684 }
14685 pm_parameters_node_optionals_append(params, param);
14686
14687 // If the value of the parameter increased the number of
14688 // reads of that parameter, then we need to warn that we
14689 // have a circular definition.
14690 if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14691 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14692 }
14693
14694 context_pop(parser);
14695
14696 // If parsing the value of the parameter resulted in error recovery,
14697 // then we can put a missing node in its place and stop parsing the
14698 // parameters entirely now.
14699 if (parser->recovering) {
14700 parsing = false;
14701 break;
14702 }
14703 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14704 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14705 if (repeated) {
14706 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14707 }
14708 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14709 } else {
14710 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14711 if (repeated) {
14712 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14713 }
14714 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14715 }
14716
14717 break;
14718 }
14719 case PM_TOKEN_LABEL: {
14720 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14721 update_parameter_state(parser, &parser->current, &order);
14722
14723 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14724 parser_lex(parser);
14725
14726 pm_token_t name = parser->previous;
14727 pm_token_t local = name;
14728 local.end -= 1;
14729
14730 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14731 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14732 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14733 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14734 }
14735
14736 bool repeated = pm_parser_parameter_name_check(parser, &local);
14737 pm_parser_local_add_token(parser, &local, 1);
14738
14739 switch (parser->current.type) {
14740 case PM_TOKEN_COMMA:
14742 case PM_TOKEN_PIPE: {
14743 context_pop(parser);
14744
14745 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14746 if (repeated) {
14747 pm_node_flag_set_repeated_parameter(param);
14748 }
14749
14750 pm_parameters_node_keywords_append(params, param);
14751 break;
14752 }
14753 case PM_TOKEN_SEMICOLON:
14754 case PM_TOKEN_NEWLINE: {
14755 context_pop(parser);
14756
14757 if (uses_parentheses) {
14758 parsing = false;
14759 break;
14760 }
14761
14762 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14763 if (repeated) {
14764 pm_node_flag_set_repeated_parameter(param);
14765 }
14766
14767 pm_parameters_node_keywords_append(params, param);
14768 break;
14769 }
14770 default: {
14771 pm_node_t *param;
14772
14773 if (token_begins_expression_p(parser->current.type)) {
14774 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14775 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14776
14777 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14778 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14779 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14780
14781 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14782 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14783 }
14784
14785 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14786 }
14787 else {
14788 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14789 }
14790
14791 if (repeated) {
14792 pm_node_flag_set_repeated_parameter(param);
14793 }
14794
14795 context_pop(parser);
14796 pm_parameters_node_keywords_append(params, param);
14797
14798 // If parsing the value of the parameter resulted in error recovery,
14799 // then we can put a missing node in its place and stop parsing the
14800 // parameters entirely now.
14801 if (parser->recovering) {
14802 parsing = false;
14803 break;
14804 }
14805 }
14806 }
14807
14808 parser->in_keyword_arg = false;
14809 break;
14810 }
14811 case PM_TOKEN_USTAR:
14812 case PM_TOKEN_STAR: {
14813 update_parameter_state(parser, &parser->current, &order);
14814 parser_lex(parser);
14815
14816 pm_token_t operator = parser->previous;
14817 pm_token_t name;
14818 bool repeated = false;
14819
14820 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14821 name = parser->previous;
14822 repeated = pm_parser_parameter_name_check(parser, &name);
14823 pm_parser_local_add_token(parser, &name, 1);
14824 } else {
14825 name = not_provided(parser);
14826 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14827 }
14828
14829 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14830 if (repeated) {
14831 pm_node_flag_set_repeated_parameter(param);
14832 }
14833
14834 if (params->rest == NULL) {
14835 pm_parameters_node_rest_set(params, param);
14836 } else {
14837 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14838 pm_parameters_node_posts_append(params, param);
14839 }
14840
14841 break;
14842 }
14843 case PM_TOKEN_STAR_STAR:
14844 case PM_TOKEN_USTAR_STAR: {
14845 pm_parameters_order_t previous_order = order;
14846 update_parameter_state(parser, &parser->current, &order);
14847 parser_lex(parser);
14848
14849 pm_token_t operator = parser->previous;
14850 pm_node_t *param;
14851
14852 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14853 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14854 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14855 }
14856
14857 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14858 } else {
14859 pm_token_t name;
14860
14861 bool repeated = false;
14862 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14863 name = parser->previous;
14864 repeated = pm_parser_parameter_name_check(parser, &name);
14865 pm_parser_local_add_token(parser, &name, 1);
14866 } else {
14867 name = not_provided(parser);
14868 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14869 }
14870
14871 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14872 if (repeated) {
14873 pm_node_flag_set_repeated_parameter(param);
14874 }
14875 }
14876
14877 if (params->keyword_rest == NULL) {
14878 pm_parameters_node_keyword_rest_set(params, param);
14879 } else {
14880 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14881 pm_parameters_node_posts_append(params, param);
14882 }
14883
14884 break;
14885 }
14886 default:
14887 if (parser->previous.type == PM_TOKEN_COMMA) {
14888 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14889 // If we get here, then we have a trailing comma in a
14890 // block parameter list.
14891 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14892
14893 if (params->rest == NULL) {
14894 pm_parameters_node_rest_set(params, param);
14895 } else {
14896 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14897 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14898 }
14899 } else {
14900 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14901 }
14902 }
14903
14904 parsing = false;
14905 break;
14906 }
14907
14908 // If we hit some kind of issue while parsing the parameter, this would
14909 // have been set to false. In that case, we need to break out of the
14910 // loop.
14911 if (!parsing) break;
14912
14913 bool accepted_newline = false;
14914 if (uses_parentheses) {
14915 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14916 }
14917
14918 if (accept1(parser, PM_TOKEN_COMMA)) {
14919 // If there was a comma, but we also accepted a newline, then this
14920 // is a syntax error.
14921 if (accepted_newline) {
14922 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14923 }
14924 } else {
14925 // If there was no comma, then we're done parsing parameters.
14926 break;
14927 }
14928 }
14929
14930 pm_do_loop_stack_pop(parser);
14931
14932 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14933 if (params->base.location.start == params->base.location.end) {
14934 pm_node_destroy(parser, (pm_node_t *) params);
14935 return NULL;
14936 }
14937
14938 return params;
14939}
14940
14945static size_t
14946token_newline_index(const pm_parser_t *parser) {
14947 if (parser->heredoc_end == NULL) {
14948 // This is the common case. In this case we can look at the previously
14949 // recorded newline in the newline list and subtract from the current
14950 // offset.
14951 return parser->newline_list.size - 1;
14952 } else {
14953 // This is unlikely. This is the case that we have already parsed the
14954 // start of a heredoc, so we cannot rely on looking at the previous
14955 // offset of the newline list, and instead must go through the whole
14956 // process of a binary search for the line number.
14957 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14958 }
14959}
14960
14965static int64_t
14966token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14967 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14968 const uint8_t *end = token->start;
14969
14970 // Skip over the BOM if it is present.
14971 if (
14972 newline_index == 0 &&
14973 parser->start[0] == 0xef &&
14974 parser->start[1] == 0xbb &&
14975 parser->start[2] == 0xbf
14976 ) cursor += 3;
14977
14978 int64_t column = 0;
14979 for (; cursor < end; cursor++) {
14980 switch (*cursor) {
14981 case '\t':
14982 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14983 break;
14984 case ' ':
14985 column++;
14986 break;
14987 default:
14988 column++;
14989 if (break_on_non_space) return -1;
14990 break;
14991 }
14992 }
14993
14994 return column;
14995}
14996
15001static void
15002parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
15003 // If these warnings are disabled (unlikely), then we can just return.
15004 if (!parser->warn_mismatched_indentation) return;
15005
15006 // If the tokens are on the same line, we do not warn.
15007 size_t closing_newline_index = token_newline_index(parser);
15008 if (opening_newline_index == closing_newline_index) return;
15009
15010 // If the opening token has anything other than spaces or tabs before it,
15011 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15012 // and the `if` immediately follows an `else` keyword.
15013 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15014 if (!if_after_else && (opening_column == -1)) return;
15015
15016 // Get a reference to the closing token off the current parser. This assumes
15017 // that the caller has placed this in the correct position.
15018 pm_token_t *closing_token = &parser->current;
15019
15020 // If the tokens are at the same indentation, we do not warn.
15021 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15022 if ((closing_column == -1) || (opening_column == closing_column)) return;
15023
15024 // If the closing column is greater than the opening column and we are
15025 // allowing indentation, then we do not warn.
15026 if (allow_indent && (closing_column > opening_column)) return;
15027
15028 // Otherwise, add a warning.
15029 PM_PARSER_WARN_FORMAT(
15030 parser,
15031 closing_token->start,
15032 closing_token->end,
15033 PM_WARN_INDENTATION_MISMATCH,
15034 (int) (closing_token->end - closing_token->start),
15035 (const char *) closing_token->start,
15036 (int) (opening_token->end - opening_token->start),
15037 (const char *) opening_token->start,
15038 ((int32_t) opening_newline_index) + parser->start_line
15039 );
15040}
15041
15042typedef enum {
15043 PM_RESCUES_BEGIN = 1,
15044 PM_RESCUES_BLOCK,
15045 PM_RESCUES_CLASS,
15046 PM_RESCUES_DEF,
15047 PM_RESCUES_LAMBDA,
15048 PM_RESCUES_MODULE,
15049 PM_RESCUES_SCLASS
15050} pm_rescues_type_t;
15051
15056static inline void
15057parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15058 pm_rescue_node_t *current = NULL;
15059
15060 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15061 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15062 parser_lex(parser);
15063
15064 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15065
15066 switch (parser->current.type) {
15068 // Here we have an immediate => after the rescue keyword, in which case
15069 // we're going to have an empty list of exceptions to rescue (which
15070 // implies StandardError).
15071 parser_lex(parser);
15072 pm_rescue_node_operator_set(rescue, &parser->previous);
15073
15074 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15075 reference = parse_target(parser, reference, false, false);
15076
15077 pm_rescue_node_reference_set(rescue, reference);
15078 break;
15079 }
15080 case PM_TOKEN_NEWLINE:
15081 case PM_TOKEN_SEMICOLON:
15083 // Here we have a terminator for the rescue keyword, in which
15084 // case we're going to just continue on.
15085 break;
15086 default: {
15087 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15088 // Here we have something that could be an exception expression, so
15089 // we'll attempt to parse it here and any others delimited by commas.
15090
15091 do {
15092 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15093 pm_rescue_node_exceptions_append(rescue, expression);
15094
15095 // If we hit a newline, then this is the end of the rescue expression. We
15096 // can continue on to parse the statements.
15097 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15098
15099 // If we hit a `=>` then we're going to parse the exception variable. Once
15100 // we've done that, we'll break out of the loop and parse the statements.
15101 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15102 pm_rescue_node_operator_set(rescue, &parser->previous);
15103
15104 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15105 reference = parse_target(parser, reference, false, false);
15106
15107 pm_rescue_node_reference_set(rescue, reference);
15108 break;
15109 }
15110 } while (accept1(parser, PM_TOKEN_COMMA));
15111 }
15112 }
15113 }
15114
15115 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15116 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15117 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15118 }
15119 } else {
15120 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15121 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15122 }
15123
15125 pm_accepts_block_stack_push(parser, true);
15126 pm_context_t context;
15127
15128 switch (type) {
15129 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15130 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15131 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15132 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15133 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15134 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15135 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15136 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15137 }
15138
15139 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15140 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15141
15142 pm_accepts_block_stack_pop(parser);
15143 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15144 }
15145
15146 if (current == NULL) {
15147 pm_begin_node_rescue_clause_set(parent_node, rescue);
15148 } else {
15149 pm_rescue_node_subsequent_set(current, rescue);
15150 }
15151
15152 current = rescue;
15153 }
15154
15155 // The end node locations on rescue nodes will not be set correctly
15156 // since we won't know the end until we've found all subsequent
15157 // clauses. This sets the end location on all rescues once we know it.
15158 if (current != NULL) {
15159 const uint8_t *end_to_set = current->base.location.end;
15160 pm_rescue_node_t *clause = parent_node->rescue_clause;
15161
15162 while (clause != NULL) {
15163 clause->base.location.end = end_to_set;
15164 clause = clause->subsequent;
15165 }
15166 }
15167
15168 pm_token_t else_keyword;
15169 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15170 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15171 opening_newline_index = token_newline_index(parser);
15172
15173 else_keyword = parser->current;
15174 opening = &else_keyword;
15175
15176 parser_lex(parser);
15177 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15178
15179 pm_statements_node_t *else_statements = NULL;
15180 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15181 pm_accepts_block_stack_push(parser, true);
15182 pm_context_t context;
15183
15184 switch (type) {
15185 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15186 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15187 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15188 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15189 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15190 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15191 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15192 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15193 }
15194
15195 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15196 pm_accepts_block_stack_pop(parser);
15197
15198 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15199 }
15200
15201 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15202 pm_begin_node_else_clause_set(parent_node, else_clause);
15203
15204 // If we don't have a `current` rescue node, then this is a dangling
15205 // else, and it's an error.
15206 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15207 }
15208
15209 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15210 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15211 pm_token_t ensure_keyword = parser->current;
15212
15213 parser_lex(parser);
15214 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15215
15216 pm_statements_node_t *ensure_statements = NULL;
15217 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15218 pm_accepts_block_stack_push(parser, true);
15219 pm_context_t context;
15220
15221 switch (type) {
15222 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15223 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15224 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15225 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15226 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15227 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15228 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15229 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15230 }
15231
15232 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15233 pm_accepts_block_stack_pop(parser);
15234
15235 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15236 }
15237
15238 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15239 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15240 }
15241
15242 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15243 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15244 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15245 } else {
15246 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15247 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15248 }
15249}
15250
15255static pm_begin_node_t *
15256parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15257 pm_token_t begin_keyword = not_provided(parser);
15258 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15259
15260 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15261 node->base.location.start = start;
15262
15263 return node;
15264}
15265
15270parse_block_parameters(
15271 pm_parser_t *parser,
15272 bool allows_trailing_comma,
15273 const pm_token_t *opening,
15274 bool is_lambda_literal,
15275 bool accepts_blocks_in_defaults,
15276 uint16_t depth
15277) {
15278 pm_parameters_node_t *parameters = NULL;
15279 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15280 parameters = parse_parameters(
15281 parser,
15282 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15283 false,
15284 allows_trailing_comma,
15285 false,
15286 accepts_blocks_in_defaults,
15287 true,
15288 (uint16_t) (depth + 1)
15289 );
15290 }
15291
15292 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15293 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15294 accept1(parser, PM_TOKEN_NEWLINE);
15295
15296 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15297 do {
15298 switch (parser->current.type) {
15299 case PM_TOKEN_CONSTANT:
15300 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15301 parser_lex(parser);
15302 break;
15304 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15305 parser_lex(parser);
15306 break;
15308 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15309 parser_lex(parser);
15310 break;
15312 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15313 parser_lex(parser);
15314 break;
15315 default:
15316 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15317 break;
15318 }
15319
15320 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15321 pm_parser_local_add_token(parser, &parser->previous, 1);
15322
15323 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15324 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15325
15326 pm_block_parameters_node_append_local(block_parameters, local);
15327 } while (accept1(parser, PM_TOKEN_COMMA));
15328 }
15329 }
15330
15331 return block_parameters;
15332}
15333
15338static bool
15339outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15340 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15341 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15342 }
15343
15344 return false;
15345}
15346
15352static const char * const pm_numbered_parameter_names[] = {
15353 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15354};
15355
15361static pm_node_t *
15362parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15363 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15364
15365 // If we have ordinary parameters, then we will return them as the set of
15366 // parameters.
15367 if (parameters != NULL) {
15368 // If we also have implicit parameters, then this is an error.
15369 if (implicit_parameters->size > 0) {
15370 pm_node_t *node = implicit_parameters->nodes[0];
15371
15373 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15375 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15376 } else {
15377 assert(false && "unreachable");
15378 }
15379 }
15380
15381 return parameters;
15382 }
15383
15384 // If we don't have any implicit parameters, then the set of parameters is
15385 // NULL.
15386 if (implicit_parameters->size == 0) {
15387 return NULL;
15388 }
15389
15390 // If we don't have ordinary parameters, then we now must validate our set
15391 // of implicit parameters. We can only have numbered parameters or it, but
15392 // they cannot be mixed.
15393 uint8_t numbered_parameter = 0;
15394 bool it_parameter = false;
15395
15396 for (size_t index = 0; index < implicit_parameters->size; index++) {
15397 pm_node_t *node = implicit_parameters->nodes[index];
15398
15400 if (it_parameter) {
15401 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15402 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15403 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15404 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15405 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15406 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15407 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15408 } else {
15409 assert(false && "unreachable");
15410 }
15412 if (numbered_parameter > 0) {
15413 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15414 } else {
15415 it_parameter = true;
15416 }
15417 }
15418 }
15419
15420 if (numbered_parameter > 0) {
15421 // Go through the parent scopes and mark them as being disallowed from
15422 // using numbered parameters because this inner scope is using them.
15423 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15424 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15425 }
15426
15427 const pm_location_t location = { .start = opening->start, .end = closing->end };
15428 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15429 }
15430
15431 if (it_parameter) {
15432 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15433 }
15434
15435 return NULL;
15436}
15437
15441static pm_block_node_t *
15442parse_block(pm_parser_t *parser, uint16_t depth) {
15443 pm_token_t opening = parser->previous;
15444 accept1(parser, PM_TOKEN_NEWLINE);
15445
15446 pm_accepts_block_stack_push(parser, true);
15447 pm_parser_scope_push(parser, false);
15448
15449 pm_block_parameters_node_t *block_parameters = NULL;
15450
15451 if (accept1(parser, PM_TOKEN_PIPE)) {
15452 pm_token_t block_parameters_opening = parser->previous;
15453 if (match1(parser, PM_TOKEN_PIPE)) {
15454 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15455 parser->command_start = true;
15456 parser_lex(parser);
15457 } else {
15458 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15459 accept1(parser, PM_TOKEN_NEWLINE);
15460 parser->command_start = true;
15461 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15462 }
15463
15464 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15465 }
15466
15467 accept1(parser, PM_TOKEN_NEWLINE);
15468 pm_node_t *statements = NULL;
15469
15470 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15471 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15472 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15473 }
15474
15475 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15476 } else {
15477 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15479 pm_accepts_block_stack_push(parser, true);
15480 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15481 pm_accepts_block_stack_pop(parser);
15482 }
15483
15484 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15485 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15486 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15487 }
15488 }
15489
15490 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15491 }
15492
15493 pm_constant_id_list_t locals;
15494 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15495 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15496
15497 pm_parser_scope_pop(parser);
15498 pm_accepts_block_stack_pop(parser);
15499
15500 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15501}
15502
15508static bool
15509parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15510 bool found = false;
15511
15512 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15513 found |= true;
15514 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15515
15516 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15517 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15518 } else {
15519 pm_accepts_block_stack_push(parser, true);
15520 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15521
15522 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15523 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15524 parser->previous.start = parser->previous.end;
15525 parser->previous.type = PM_TOKEN_MISSING;
15526 }
15527
15528 pm_accepts_block_stack_pop(parser);
15529 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15530 }
15531 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15532 found |= true;
15533 pm_accepts_block_stack_push(parser, false);
15534
15535 // If we get here, then the subsequent token cannot be used as an infix
15536 // operator. In this case we assume the subsequent token is part of an
15537 // argument to this method call.
15538 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15539
15540 // If we have done with the arguments and still not consumed the comma,
15541 // then we have a trailing comma where we need to check whether it is
15542 // allowed or not.
15543 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15544 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15545 }
15546
15547 pm_accepts_block_stack_pop(parser);
15548 }
15549
15550 // If we're at the end of the arguments, we can now check if there is a block
15551 // node that starts with a {. If there is, then we can parse it and add it to
15552 // the arguments.
15553 if (accepts_block) {
15554 pm_block_node_t *block = NULL;
15555
15556 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15557 found |= true;
15558 block = parse_block(parser, (uint16_t) (depth + 1));
15559 pm_arguments_validate_block(parser, arguments, block);
15560 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15561 found |= true;
15562 block = parse_block(parser, (uint16_t) (depth + 1));
15563 }
15564
15565 if (block != NULL) {
15566 if (arguments->block == NULL && !arguments->has_forwarding) {
15567 arguments->block = (pm_node_t *) block;
15568 } else {
15569 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15570
15571 if (arguments->block != NULL) {
15572 if (arguments->arguments == NULL) {
15573 arguments->arguments = pm_arguments_node_create(parser);
15574 }
15575 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15576 }
15577 arguments->block = (pm_node_t *) block;
15578 }
15579 }
15580 }
15581
15582 return found;
15583}
15584
15589static void
15590parse_return(pm_parser_t *parser, pm_node_t *node) {
15591 bool in_sclass = false;
15592 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15593 switch (context_node->context) {
15597 case PM_CONTEXT_BEGIN:
15598 case PM_CONTEXT_CASE_IN:
15601 case PM_CONTEXT_DEFINED:
15602 case PM_CONTEXT_ELSE:
15603 case PM_CONTEXT_ELSIF:
15604 case PM_CONTEXT_EMBEXPR:
15606 case PM_CONTEXT_FOR:
15607 case PM_CONTEXT_IF:
15609 case PM_CONTEXT_MAIN:
15611 case PM_CONTEXT_PARENS:
15612 case PM_CONTEXT_POSTEXE:
15614 case PM_CONTEXT_PREEXE:
15616 case PM_CONTEXT_TERNARY:
15617 case PM_CONTEXT_UNLESS:
15618 case PM_CONTEXT_UNTIL:
15619 case PM_CONTEXT_WHILE:
15620 // Keep iterating up the lists of contexts, because returns can
15621 // see through these.
15622 continue;
15626 case PM_CONTEXT_SCLASS:
15627 in_sclass = true;
15628 continue;
15632 case PM_CONTEXT_CLASS:
15636 case PM_CONTEXT_MODULE:
15637 // These contexts are invalid for a return.
15638 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15639 return;
15649 case PM_CONTEXT_DEF:
15655 // These contexts are valid for a return, and we should not
15656 // continue to loop.
15657 return;
15658 case PM_CONTEXT_NONE:
15659 // This case should never happen.
15660 assert(false && "unreachable");
15661 break;
15662 }
15663 }
15664 if (in_sclass) {
15665 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15666 }
15667}
15668
15673static void
15674parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15675 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15676 switch (context_node->context) {
15682 case PM_CONTEXT_DEFINED:
15683 case PM_CONTEXT_FOR:
15690 case PM_CONTEXT_POSTEXE:
15691 case PM_CONTEXT_UNTIL:
15692 case PM_CONTEXT_WHILE:
15693 // These are the good cases. We're allowed to have a block exit
15694 // in these contexts.
15695 return;
15696 case PM_CONTEXT_DEF:
15701 case PM_CONTEXT_MAIN:
15702 case PM_CONTEXT_PREEXE:
15703 case PM_CONTEXT_SCLASS:
15707 // These are the bad cases. We're not allowed to have a block
15708 // exit in these contexts.
15709 //
15710 // If we get here, then we're about to mark this block exit
15711 // as invalid. However, it could later _become_ valid if we
15712 // find a trailing while/until on the expression. In this
15713 // case instead of adding the error here, we'll add the
15714 // block exit to the list of exits for the expression, and
15715 // the node parsing will handle validating it instead.
15716 assert(parser->current_block_exits != NULL);
15717 pm_node_list_append(parser->current_block_exits, node);
15718 return;
15722 case PM_CONTEXT_BEGIN:
15723 case PM_CONTEXT_CASE_IN:
15728 case PM_CONTEXT_CLASS:
15730 case PM_CONTEXT_ELSE:
15731 case PM_CONTEXT_ELSIF:
15732 case PM_CONTEXT_EMBEXPR:
15734 case PM_CONTEXT_IF:
15738 case PM_CONTEXT_MODULE:
15740 case PM_CONTEXT_PARENS:
15743 case PM_CONTEXT_TERNARY:
15744 case PM_CONTEXT_UNLESS:
15745 // In these contexts we should continue walking up the list of
15746 // contexts.
15747 break;
15748 case PM_CONTEXT_NONE:
15749 // This case should never happen.
15750 assert(false && "unreachable");
15751 break;
15752 }
15753 }
15754}
15755
15760static pm_node_list_t *
15761push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15762 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15763 parser->current_block_exits = current_block_exits;
15764 return previous_block_exits;
15765}
15766
15772static void
15773flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15774 pm_node_t *block_exit;
15775 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15776 const char *type;
15777
15778 switch (PM_NODE_TYPE(block_exit)) {
15779 case PM_BREAK_NODE: type = "break"; break;
15780 case PM_NEXT_NODE: type = "next"; break;
15781 case PM_REDO_NODE: type = "redo"; break;
15782 default: assert(false && "unreachable"); type = ""; break;
15783 }
15784
15785 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15786 }
15787
15788 parser->current_block_exits = previous_block_exits;
15789}
15790
15795static void
15796pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15798 // If we matched a trailing while/until, then all of the block exits in
15799 // the contained list are valid. In this case we do not need to do
15800 // anything.
15801 parser->current_block_exits = previous_block_exits;
15802 } else if (previous_block_exits != NULL) {
15803 // If we did not matching a trailing while/until, then all of the block
15804 // exits contained in the list are invalid for this specific context.
15805 // However, they could still become valid in a higher level context if
15806 // there is another list above this one. In this case we'll push all of
15807 // the block exits up to the previous list.
15808 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15809 parser->current_block_exits = previous_block_exits;
15810 } else {
15811 // If we did not match a trailing while/until and this was the last
15812 // chance to do so, then all of the block exits in the list are invalid
15813 // and we need to add an error for each of them.
15814 flush_block_exits(parser, previous_block_exits);
15815 }
15816}
15817
15818static inline pm_node_t *
15819parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15820 context_push(parser, PM_CONTEXT_PREDICATE);
15821 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15822 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15823
15824 // Predicates are closed by a term, a "then", or a term and then a "then".
15825 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15826
15827 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15828 predicate_closed = true;
15829 *then_keyword = parser->previous;
15830 }
15831
15832 if (!predicate_closed) {
15833 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15834 }
15835
15836 context_pop(parser);
15837 return predicate;
15838}
15839
15840static inline pm_node_t *
15841parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15842 pm_node_list_t current_block_exits = { 0 };
15843 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15844
15845 pm_token_t keyword = parser->previous;
15846 pm_token_t then_keyword = not_provided(parser);
15847
15848 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15849 pm_statements_node_t *statements = NULL;
15850
15852 pm_accepts_block_stack_push(parser, true);
15853 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15854 pm_accepts_block_stack_pop(parser);
15855 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15856 }
15857
15858 pm_token_t end_keyword = not_provided(parser);
15859 pm_node_t *parent = NULL;
15860
15861 switch (context) {
15862 case PM_CONTEXT_IF:
15863 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15864 break;
15865 case PM_CONTEXT_UNLESS:
15866 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15867 break;
15868 default:
15869 assert(false && "unreachable");
15870 break;
15871 }
15872
15873 pm_node_t *current = parent;
15874
15875 // Parse any number of elsif clauses. This will form a linked list of if
15876 // nodes pointing to each other from the top.
15877 if (context == PM_CONTEXT_IF) {
15878 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15879 if (parser_end_of_line_p(parser)) {
15880 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15881 }
15882
15883 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15884 pm_token_t elsif_keyword = parser->current;
15885 parser_lex(parser);
15886
15887 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15888 pm_accepts_block_stack_push(parser, true);
15889
15890 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15891 pm_accepts_block_stack_pop(parser);
15892 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15893
15894 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15895 ((pm_if_node_t *) current)->subsequent = elsif;
15896 current = elsif;
15897 }
15898 }
15899
15900 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15901 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15902 opening_newline_index = token_newline_index(parser);
15903
15904 parser_lex(parser);
15905 pm_token_t else_keyword = parser->previous;
15906
15907 pm_accepts_block_stack_push(parser, true);
15908 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15909 pm_accepts_block_stack_pop(parser);
15910
15911 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15912 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15913 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15914
15915 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15916
15917 switch (context) {
15918 case PM_CONTEXT_IF:
15919 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15920 break;
15921 case PM_CONTEXT_UNLESS:
15922 ((pm_unless_node_t *) parent)->else_clause = else_node;
15923 break;
15924 default:
15925 assert(false && "unreachable");
15926 break;
15927 }
15928 } else {
15929 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15930 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15931 }
15932
15933 // Set the appropriate end location for all of the nodes in the subtree.
15934 switch (context) {
15935 case PM_CONTEXT_IF: {
15936 pm_node_t *current = parent;
15937 bool recursing = true;
15938
15939 while (recursing) {
15940 switch (PM_NODE_TYPE(current)) {
15941 case PM_IF_NODE:
15942 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15943 current = ((pm_if_node_t *) current)->subsequent;
15944 recursing = current != NULL;
15945 break;
15946 case PM_ELSE_NODE:
15947 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15948 recursing = false;
15949 break;
15950 default: {
15951 recursing = false;
15952 break;
15953 }
15954 }
15955 }
15956 break;
15957 }
15958 case PM_CONTEXT_UNLESS:
15959 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15960 break;
15961 default:
15962 assert(false && "unreachable");
15963 break;
15964 }
15965
15966 pop_block_exits(parser, previous_block_exits);
15967 pm_node_list_free(&current_block_exits);
15968
15969 return parent;
15970}
15971
15976#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15977 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15978 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15979 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15980 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15981 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15982 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15983 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15984 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15985 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15986 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15987
15992#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15993 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15994 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15995 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15996 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15997 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15998 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15999 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
16000
16006#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
16007 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
16008 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
16009 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
16010 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16011 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16012 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16013 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16014 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16015
16020#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16021 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16022 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16023 case PM_TOKEN_CLASS_VARIABLE
16024
16029#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16030 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16031 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16032 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16033
16034// Assert here that the flags are the same so that we can safely switch the type
16035// of the node without having to move the flags.
16036PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16037
16042static inline pm_node_flags_t
16043parse_unescaped_encoding(const pm_parser_t *parser) {
16044 if (parser->explicit_encoding != NULL) {
16046 // If the there's an explicit encoding and it's using a UTF-8 escape
16047 // sequence, then mark the string as UTF-8.
16049 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16050 // If there's a non-UTF-8 escape sequence being used, then the
16051 // string uses the source encoding, unless the source is marked as
16052 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16053 // order to keep the string valid.
16055 }
16056 }
16057 return 0;
16058}
16059
16064static pm_node_t *
16065parse_string_part(pm_parser_t *parser, uint16_t depth) {
16066 switch (parser->current.type) {
16067 // Here the lexer has returned to us plain string content. In this case
16068 // we'll create a string node that has no opening or closing and return that
16069 // as the part. These kinds of parts look like:
16070 //
16071 // "aaa #{bbb} #@ccc ddd"
16072 // ^^^^ ^ ^^^^
16074 pm_token_t opening = not_provided(parser);
16075 pm_token_t closing = not_provided(parser);
16076
16077 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16078 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16079
16080 parser_lex(parser);
16081 return node;
16082 }
16083 // Here the lexer has returned the beginning of an embedded expression. In
16084 // that case we'll parse the inner statements and return that as the part.
16085 // These kinds of parts look like:
16086 //
16087 // "aaa #{bbb} #@ccc ddd"
16088 // ^^^^^^
16090 // Ruby disallows seeing encoding around interpolation in strings,
16091 // even though it is known at parse time.
16092 parser->explicit_encoding = NULL;
16093
16094 pm_lex_state_t state = parser->lex_state;
16095 int brace_nesting = parser->brace_nesting;
16096
16097 parser->brace_nesting = 0;
16098 lex_state_set(parser, PM_LEX_STATE_BEG);
16099 parser_lex(parser);
16100
16101 pm_token_t opening = parser->previous;
16102 pm_statements_node_t *statements = NULL;
16103
16104 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16105 pm_accepts_block_stack_push(parser, true);
16106 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16107 pm_accepts_block_stack_pop(parser);
16108 }
16109
16110 parser->brace_nesting = brace_nesting;
16111 lex_state_set(parser, state);
16112
16113 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16114 pm_token_t closing = parser->previous;
16115
16116 // If this set of embedded statements only contains a single
16117 // statement, then Ruby does not consider it as a possible statement
16118 // that could emit a line event.
16119 if (statements != NULL && statements->body.size == 1) {
16120 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16121 }
16122
16123 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16124 }
16125
16126 // Here the lexer has returned the beginning of an embedded variable.
16127 // In that case we'll parse the variable and create an appropriate node
16128 // for it and then return that node. These kinds of parts look like:
16129 //
16130 // "aaa #{bbb} #@ccc ddd"
16131 // ^^^^^
16132 case PM_TOKEN_EMBVAR: {
16133 // Ruby disallows seeing encoding around interpolation in strings,
16134 // even though it is known at parse time.
16135 parser->explicit_encoding = NULL;
16136
16137 lex_state_set(parser, PM_LEX_STATE_BEG);
16138 parser_lex(parser);
16139
16140 pm_token_t operator = parser->previous;
16141 pm_node_t *variable;
16142
16143 switch (parser->current.type) {
16144 // In this case a back reference is being interpolated. We'll
16145 // create a global variable read node.
16147 parser_lex(parser);
16148 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16149 break;
16150 // In this case an nth reference is being interpolated. We'll
16151 // create a global variable read node.
16153 parser_lex(parser);
16154 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16155 break;
16156 // In this case a global variable is being interpolated. We'll
16157 // create a global variable read node.
16159 parser_lex(parser);
16160 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16161 break;
16162 // In this case an instance variable is being interpolated.
16163 // We'll create an instance variable read node.
16165 parser_lex(parser);
16166 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16167 break;
16168 // In this case a class variable is being interpolated. We'll
16169 // create a class variable read node.
16171 parser_lex(parser);
16172 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16173 break;
16174 // We can hit here if we got an invalid token. In that case
16175 // we'll not attempt to lex this token and instead just return a
16176 // missing node.
16177 default:
16178 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16179 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16180 break;
16181 }
16182
16183 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16184 }
16185 default:
16186 parser_lex(parser);
16187 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16188 return NULL;
16189 }
16190}
16191
16197static const uint8_t *
16198parse_operator_symbol_name(const pm_token_t *name) {
16199 switch (name->type) {
16200 case PM_TOKEN_TILDE:
16201 case PM_TOKEN_BANG:
16202 if (name->end[-1] == '@') return name->end - 1;
16204 default:
16205 return name->end;
16206 }
16207}
16208
16209static pm_node_t *
16210parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16211 pm_token_t closing = not_provided(parser);
16212 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16213
16214 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16215
16216 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16217 parser_lex(parser);
16218
16219 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16220 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16221
16222 return (pm_node_t *) symbol;
16223}
16224
16230static pm_node_t *
16231parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16232 const pm_token_t opening = parser->previous;
16233
16234 if (lex_mode->mode != PM_LEX_STRING) {
16235 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16236
16237 switch (parser->current.type) {
16238 case PM_CASE_OPERATOR:
16239 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16241 case PM_TOKEN_CONSTANT:
16248 case PM_CASE_KEYWORD:
16249 parser_lex(parser);
16250 break;
16251 default:
16252 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16253 break;
16254 }
16255
16256 pm_token_t closing = not_provided(parser);
16257 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16258
16259 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16260 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16261
16262 return (pm_node_t *) symbol;
16263 }
16264
16265 if (lex_mode->as.string.interpolation) {
16266 // If we have the end of the symbol, then we can return an empty symbol.
16267 if (match1(parser, PM_TOKEN_STRING_END)) {
16268 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16269 parser_lex(parser);
16270
16271 pm_token_t content = not_provided(parser);
16272 pm_token_t closing = parser->previous;
16273 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16274 }
16275
16276 // Now we can parse the first part of the symbol.
16277 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16278
16279 // If we got a string part, then it's possible that we could transform
16280 // what looks like an interpolated symbol into a regular symbol.
16281 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16282 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16283 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16284
16285 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16286 }
16287
16288 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16289 if (part) pm_interpolated_symbol_node_append(symbol, part);
16290
16291 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16292 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16293 pm_interpolated_symbol_node_append(symbol, part);
16294 }
16295 }
16296
16297 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16298 if (match1(parser, PM_TOKEN_EOF)) {
16299 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16300 } else {
16301 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16302 }
16303
16304 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16305 return (pm_node_t *) symbol;
16306 }
16307
16308 pm_token_t content;
16309 pm_string_t unescaped;
16310
16311 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16312 content = parser->current;
16313 unescaped = parser->current_string;
16314 parser_lex(parser);
16315
16316 // If we have two string contents in a row, then the content of this
16317 // symbol is split because of heredoc contents. This looks like:
16318 //
16319 // <<A; :'a
16320 // A
16321 // b'
16322 //
16323 // In this case, the best way we have to represent this is as an
16324 // interpolated string node, so that's what we'll do here.
16325 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16326 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16327 pm_token_t bounds = not_provided(parser);
16328
16329 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16330 pm_interpolated_symbol_node_append(symbol, part);
16331
16332 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16333 pm_interpolated_symbol_node_append(symbol, part);
16334
16335 if (next_state != PM_LEX_STATE_NONE) {
16336 lex_state_set(parser, next_state);
16337 }
16338
16339 parser_lex(parser);
16340 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16341
16342 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16343 return (pm_node_t *) symbol;
16344 }
16345 } else {
16346 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16347 pm_string_shared_init(&unescaped, content.start, content.end);
16348 }
16349
16350 if (next_state != PM_LEX_STATE_NONE) {
16351 lex_state_set(parser, next_state);
16352 }
16353
16354 if (match1(parser, PM_TOKEN_EOF)) {
16355 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16356 } else {
16357 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16358 }
16359
16360 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16361}
16362
16367static inline pm_node_t *
16368parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16369 switch (parser->current.type) {
16370 case PM_CASE_OPERATOR: {
16371 const pm_token_t opening = not_provided(parser);
16372 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16373 }
16374 case PM_CASE_KEYWORD:
16375 case PM_TOKEN_CONSTANT:
16377 case PM_TOKEN_METHOD_NAME: {
16378 parser_lex(parser);
16379
16380 pm_token_t opening = not_provided(parser);
16381 pm_token_t closing = not_provided(parser);
16382 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16383
16384 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16385 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16386
16387 return (pm_node_t *) symbol;
16388 }
16389 case PM_TOKEN_SYMBOL_BEGIN: {
16390 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16391 parser_lex(parser);
16392
16393 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16394 }
16395 default:
16396 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16397 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16398 }
16399}
16400
16407static inline pm_node_t *
16408parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16409 switch (parser->current.type) {
16410 case PM_CASE_OPERATOR: {
16411 const pm_token_t opening = not_provided(parser);
16412 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16413 }
16414 case PM_CASE_KEYWORD:
16415 case PM_TOKEN_CONSTANT:
16417 case PM_TOKEN_METHOD_NAME: {
16418 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16419 parser_lex(parser);
16420
16421 pm_token_t opening = not_provided(parser);
16422 pm_token_t closing = not_provided(parser);
16423 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16424
16425 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16426 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16427
16428 return (pm_node_t *) symbol;
16429 }
16430 case PM_TOKEN_SYMBOL_BEGIN: {
16431 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16432 parser_lex(parser);
16433
16434 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16435 }
16437 parser_lex(parser);
16438 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16440 parser_lex(parser);
16441 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16443 parser_lex(parser);
16444 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16445 default:
16446 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16447 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16448 }
16449}
16450
16455static pm_node_t *
16456parse_variable(pm_parser_t *parser) {
16457 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16458 int depth;
16459 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16460
16461 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16462 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16463 }
16464
16465 pm_scope_t *current_scope = parser->current_scope;
16466 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16467 if (is_numbered_param) {
16468 // When you use a numbered parameter, it implies the existence of
16469 // all of the locals that exist before it. For example, referencing
16470 // _2 means that _1 must exist. Therefore here we loop through all
16471 // of the possibilities and add them into the constant pool.
16472 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16473 for (uint8_t number = 1; number <= maximum; number++) {
16474 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16475 }
16476
16477 if (!match1(parser, PM_TOKEN_EQUAL)) {
16478 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16479 }
16480
16481 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16482 pm_node_list_append(&current_scope->implicit_parameters, node);
16483
16484 return node;
16485 } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16486 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16487 pm_node_list_append(&current_scope->implicit_parameters, node);
16488
16489 return node;
16490 }
16491 }
16492
16493 return NULL;
16494}
16495
16499static pm_node_t *
16500parse_variable_call(pm_parser_t *parser) {
16501 pm_node_flags_t flags = 0;
16502
16503 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16504 pm_node_t *node = parse_variable(parser);
16505 if (node != NULL) return node;
16507 }
16508
16509 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16510 pm_node_flag_set((pm_node_t *)node, flags);
16511
16512 return (pm_node_t *) node;
16513}
16514
16520static inline pm_token_t
16521parse_method_definition_name(pm_parser_t *parser) {
16522 switch (parser->current.type) {
16523 case PM_CASE_KEYWORD:
16524 case PM_TOKEN_CONSTANT:
16526 parser_lex(parser);
16527 return parser->previous;
16529 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16530 parser_lex(parser);
16531 return parser->previous;
16532 case PM_CASE_OPERATOR:
16533 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16534 parser_lex(parser);
16535 return parser->previous;
16536 default:
16537 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16538 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16539 }
16540}
16541
16542static void
16543parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16544 // Get a reference to the string struct that is being held by the string
16545 // node. This is the value we're going to actually manipulate.
16546 pm_string_ensure_owned(string);
16547
16548 // Now get the bounds of the existing string. We'll use this as a
16549 // destination to move bytes into. We'll also use it for bounds checking
16550 // since we don't require that these strings be null terminated.
16551 size_t dest_length = pm_string_length(string);
16552 const uint8_t *source_cursor = (uint8_t *) string->source;
16553 const uint8_t *source_end = source_cursor + dest_length;
16554
16555 // We're going to move bytes backward in the string when we get leading
16556 // whitespace, so we'll maintain a pointer to the current position in the
16557 // string that we're writing to.
16558 size_t trimmed_whitespace = 0;
16559
16560 // While we haven't reached the amount of common whitespace that we need to
16561 // trim and we haven't reached the end of the string, we'll keep trimming
16562 // whitespace. Trimming in this context means skipping over these bytes such
16563 // that they aren't copied into the new string.
16564 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16565 if (*source_cursor == '\t') {
16566 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16567 if (trimmed_whitespace > common_whitespace) break;
16568 } else {
16569 trimmed_whitespace++;
16570 }
16571
16572 source_cursor++;
16573 dest_length--;
16574 }
16575
16576 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16577 string->length = dest_length;
16578}
16579
16583static void
16584parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16585 // The next node should be dedented if it's the first node in the list or if
16586 // it follows a string node.
16587 bool dedent_next = true;
16588
16589 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16590 // keep around two indices: a read and a write. If we end up trimming all of
16591 // the whitespace from a node, then we'll drop it from the list entirely.
16592 size_t write_index = 0;
16593
16594 pm_node_t *node;
16595 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16596 // We're not manipulating child nodes that aren't strings. In this case
16597 // we'll skip past it and indicate that the subsequent node should not
16598 // be dedented.
16599 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16600 nodes->nodes[write_index++] = node;
16601 dedent_next = false;
16602 continue;
16603 }
16604
16605 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16606 if (dedent_next) {
16607 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16608 }
16609
16610 if (string_node->unescaped.length == 0) {
16611 pm_node_destroy(parser, node);
16612 } else {
16613 nodes->nodes[write_index++] = node;
16614 }
16615
16616 // We always dedent the next node if it follows a string node.
16617 dedent_next = true;
16618 }
16619
16620 nodes->size = write_index;
16621}
16622
16626static pm_token_t
16627parse_strings_empty_content(const uint8_t *location) {
16628 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16629}
16630
16634static inline pm_node_t *
16635parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16636 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16637 bool concating = false;
16638
16639 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16640 pm_node_t *node = NULL;
16641
16642 // Here we have found a string literal. We'll parse it and add it to
16643 // the list of strings.
16644 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16645 assert(lex_mode->mode == PM_LEX_STRING);
16646 bool lex_interpolation = lex_mode->as.string.interpolation;
16647 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16648
16649 pm_token_t opening = parser->current;
16650 parser_lex(parser);
16651
16652 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16653 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16654 // If we get here, then we have an end immediately after a
16655 // start. In that case we'll create an empty content token and
16656 // return an uninterpolated string.
16657 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16658 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16659
16660 pm_string_shared_init(&string->unescaped, content.start, content.end);
16661 node = (pm_node_t *) string;
16662 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16663 // If we get here, then we have an end of a label immediately
16664 // after a start. In that case we'll create an empty symbol
16665 // node.
16666 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16667 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16668
16669 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16670 node = (pm_node_t *) symbol;
16671
16672 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16673 } else if (!lex_interpolation) {
16674 // If we don't accept interpolation then we expect the string to
16675 // start with a single string content node.
16676 pm_string_t unescaped;
16677 pm_token_t content;
16678
16679 if (match1(parser, PM_TOKEN_EOF)) {
16680 unescaped = PM_STRING_EMPTY;
16681 content = not_provided(parser);
16682 } else {
16683 unescaped = parser->current_string;
16684 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16685 content = parser->previous;
16686 }
16687
16688 // It is unfortunately possible to have multiple string content
16689 // nodes in a row in the case that there's heredoc content in
16690 // the middle of the string, like this cursed example:
16691 //
16692 // <<-END+'b
16693 // a
16694 // END
16695 // c'+'d'
16696 //
16697 // In that case we need to switch to an interpolated string to
16698 // be able to contain all of the parts.
16699 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16700 pm_node_list_t parts = { 0 };
16701
16702 pm_token_t delimiters = not_provided(parser);
16703 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16704 pm_node_list_append(&parts, part);
16705
16706 do {
16707 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16708 pm_node_list_append(&parts, part);
16709 parser_lex(parser);
16710 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16711
16712 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16713 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16714
16715 pm_node_list_free(&parts);
16716 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16717 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16718 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16719 } else if (match1(parser, PM_TOKEN_EOF)) {
16720 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16721 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16722 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16723 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16724 } else {
16725 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16726 parser->previous.start = parser->previous.end;
16727 parser->previous.type = PM_TOKEN_MISSING;
16728 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16729 }
16730 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16731 // In this case we've hit string content so we know the string
16732 // at least has something in it. We'll need to check if the
16733 // following token is the end (in which case we can return a
16734 // plain string) or if it's not then it has interpolation.
16735 pm_token_t content = parser->current;
16736 pm_string_t unescaped = parser->current_string;
16737 parser_lex(parser);
16738
16739 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16740 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16741 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16742
16743 // Kind of odd behavior, but basically if we have an
16744 // unterminated string and it ends in a newline, we back up one
16745 // character so that the error message is on the last line of
16746 // content in the string.
16747 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16748 const uint8_t *location = parser->previous.end;
16749 if (location > parser->start && location[-1] == '\n') location--;
16750 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16751
16752 parser->previous.start = parser->previous.end;
16753 parser->previous.type = PM_TOKEN_MISSING;
16754 }
16755 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16756 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16757 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16758 } else {
16759 // If we get here, then we have interpolation so we'll need
16760 // to create a string or symbol node with interpolation.
16761 pm_node_list_t parts = { 0 };
16762 pm_token_t string_opening = not_provided(parser);
16763 pm_token_t string_closing = not_provided(parser);
16764
16765 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16766 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16767 pm_node_list_append(&parts, part);
16768
16769 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16770 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16771 pm_node_list_append(&parts, part);
16772 }
16773 }
16774
16775 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16776 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16777 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16778 } else if (match1(parser, PM_TOKEN_EOF)) {
16779 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16780 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16781 } else {
16782 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16783 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16784 }
16785
16786 pm_node_list_free(&parts);
16787 }
16788 } else {
16789 // If we get here, then the first part of the string is not plain
16790 // string content, in which case we need to parse the string as an
16791 // interpolated string.
16792 pm_node_list_t parts = { 0 };
16793 pm_node_t *part;
16794
16795 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16796 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16797 pm_node_list_append(&parts, part);
16798 }
16799 }
16800
16801 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16802 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16803 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16804 } else if (match1(parser, PM_TOKEN_EOF)) {
16805 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16806 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16807 } else {
16808 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16809 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16810 }
16811
16812 pm_node_list_free(&parts);
16813 }
16814
16815 if (current == NULL) {
16816 // If the node we just parsed is a symbol node, then we can't
16817 // concatenate it with anything else, so we can now return that
16818 // node.
16820 return node;
16821 }
16822
16823 // If we don't already have a node, then it's fine and we can just
16824 // set the result to be the node we just parsed.
16825 current = node;
16826 } else {
16827 // Otherwise we need to check the type of the node we just parsed.
16828 // If it cannot be concatenated with the previous node, then we'll
16829 // need to add a syntax error.
16831 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16832 }
16833
16834 // If we haven't already created our container for concatenation,
16835 // we'll do that now.
16836 if (!concating) {
16838 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16839 }
16840
16841 concating = true;
16842 pm_token_t bounds = not_provided(parser);
16843
16844 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16845 pm_interpolated_string_node_append(container, current);
16846 current = (pm_node_t *) container;
16847 }
16848
16849 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16850 }
16851 }
16852
16853 return current;
16854}
16855
16856#define PM_PARSE_PATTERN_SINGLE 0
16857#define PM_PARSE_PATTERN_TOP 1
16858#define PM_PARSE_PATTERN_MULTI 2
16859
16860static pm_node_t *
16861parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16862
16868static void
16869parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16870 // Skip this capture if it starts with an underscore.
16871 if (*location->start == '_') return;
16872
16873 if (pm_constant_id_list_includes(captures, capture)) {
16874 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16875 } else {
16876 pm_constant_id_list_append(captures, capture);
16877 }
16878}
16879
16883static pm_node_t *
16884parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16885 // Now, if there are any :: operators that follow, parse them as constant
16886 // path nodes.
16887 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16888 pm_token_t delimiter = parser->previous;
16889 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16890 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16891 }
16892
16893 // If there is a [ or ( that follows, then this is part of a larger pattern
16894 // expression. We'll parse the inner pattern here, then modify the returned
16895 // inner pattern with our constant path attached.
16896 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16897 return node;
16898 }
16899
16900 pm_token_t opening;
16901 pm_token_t closing;
16902 pm_node_t *inner = NULL;
16903
16904 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16905 opening = parser->previous;
16906 accept1(parser, PM_TOKEN_NEWLINE);
16907
16908 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16909 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16910 accept1(parser, PM_TOKEN_NEWLINE);
16911 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16912 }
16913
16914 closing = parser->previous;
16915 } else {
16916 parser_lex(parser);
16917 opening = parser->previous;
16918 accept1(parser, PM_TOKEN_NEWLINE);
16919
16920 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16921 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16922 accept1(parser, PM_TOKEN_NEWLINE);
16923 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16924 }
16925
16926 closing = parser->previous;
16927 }
16928
16929 if (!inner) {
16930 // If there was no inner pattern, then we have something like Foo() or
16931 // Foo[]. In that case we'll create an array pattern with no requireds.
16932 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16933 }
16934
16935 // Now that we have the inner pattern, check to see if it's an array, find,
16936 // or hash pattern. If it is, then we'll attach our constant path to it if
16937 // it doesn't already have a constant. If it's not one of those node types
16938 // or it does have a constant, then we'll create an array pattern.
16939 switch (PM_NODE_TYPE(inner)) {
16940 case PM_ARRAY_PATTERN_NODE: {
16941 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16942
16943 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16944 pattern_node->base.location.start = node->location.start;
16945 pattern_node->base.location.end = closing.end;
16946
16947 pattern_node->constant = node;
16948 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16949 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16950
16951 return (pm_node_t *) pattern_node;
16952 }
16953
16954 break;
16955 }
16956 case PM_FIND_PATTERN_NODE: {
16957 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16958
16959 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16960 pattern_node->base.location.start = node->location.start;
16961 pattern_node->base.location.end = closing.end;
16962
16963 pattern_node->constant = node;
16964 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16965 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16966
16967 return (pm_node_t *) pattern_node;
16968 }
16969
16970 break;
16971 }
16972 case PM_HASH_PATTERN_NODE: {
16973 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16974
16975 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16976 pattern_node->base.location.start = node->location.start;
16977 pattern_node->base.location.end = closing.end;
16978
16979 pattern_node->constant = node;
16980 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16981 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16982
16983 return (pm_node_t *) pattern_node;
16984 }
16985
16986 break;
16987 }
16988 default:
16989 break;
16990 }
16991
16992 // If we got here, then we didn't return one of the inner patterns by
16993 // attaching its constant. In this case we'll create an array pattern and
16994 // attach our constant to it.
16995 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16996 pm_array_pattern_node_requireds_append(pattern_node, inner);
16997 return (pm_node_t *) pattern_node;
16998}
16999
17003static pm_splat_node_t *
17004parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17005 assert(parser->previous.type == PM_TOKEN_USTAR);
17006 pm_token_t operator = parser->previous;
17007 pm_node_t *name = NULL;
17008
17009 // Rest patterns don't necessarily have a name associated with them. So we
17010 // will check for that here. If they do, then we'll add it to the local
17011 // table since this pattern will cause it to become a local variable.
17012 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17013 pm_token_t identifier = parser->previous;
17014 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17015
17016 int depth;
17017 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17018 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17019 }
17020
17021 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17022 name = (pm_node_t *) pm_local_variable_target_node_create(
17023 parser,
17024 &PM_LOCATION_TOKEN_VALUE(&identifier),
17025 constant_id,
17026 (uint32_t) (depth == -1 ? 0 : depth)
17027 );
17028 }
17029
17030 // Finally we can return the created node.
17031 return pm_splat_node_create(parser, &operator, name);
17032}
17033
17037static pm_node_t *
17038parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17039 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17040 parser_lex(parser);
17041
17042 pm_token_t operator = parser->previous;
17043 pm_node_t *value = NULL;
17044
17045 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17046 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17047 }
17048
17049 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17050 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17051
17052 int depth;
17053 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17054 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17055 }
17056
17057 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17058 value = (pm_node_t *) pm_local_variable_target_node_create(
17059 parser,
17060 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17061 constant_id,
17062 (uint32_t) (depth == -1 ? 0 : depth)
17063 );
17064 }
17065
17066 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17067}
17068
17073static bool
17074pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17075 ptrdiff_t length = end - start;
17076 if (length == 0) return false;
17077
17078 // First ensure that it starts with a valid identifier starting character.
17079 size_t width = char_is_identifier_start(parser, start, end - start);
17080 if (width == 0) return false;
17081
17082 // Next, ensure that it's not an uppercase character.
17083 if (parser->encoding_changed) {
17084 if (parser->encoding->isupper_char(start, length)) return false;
17085 } else {
17086 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17087 }
17088
17089 // Next, iterate through all of the bytes of the string to ensure that they
17090 // are all valid identifier characters.
17091 const uint8_t *cursor = start + width;
17092 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17093 return cursor == end;
17094}
17095
17100static pm_node_t *
17101parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17102 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17103
17104 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17105 int depth = -1;
17106
17107 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17108 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17109 } else {
17110 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17111
17112 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17113 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17114 }
17115 }
17116
17117 if (depth == -1) {
17118 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17119 }
17120
17121 parse_pattern_capture(parser, captures, constant_id, value_loc);
17122 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17123 parser,
17124 value_loc,
17125 constant_id,
17126 (uint32_t) (depth == -1 ? 0 : depth)
17127 );
17128
17129 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17130}
17131
17136static void
17137parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17138 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17139 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17140 }
17141}
17142
17147parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17148 pm_node_list_t assocs = { 0 };
17149 pm_static_literals_t keys = { 0 };
17150 pm_node_t *rest = NULL;
17151
17152 switch (PM_NODE_TYPE(first_node)) {
17155 rest = first_node;
17156 break;
17157 case PM_SYMBOL_NODE: {
17158 if (pm_symbol_node_label_p(first_node)) {
17159 parse_pattern_hash_key(parser, &keys, first_node);
17160 pm_node_t *value;
17161
17163 // Otherwise, we will create an implicit local variable
17164 // target for the value.
17165 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17166 } else {
17167 // Here we have a value for the first assoc in the list, so
17168 // we will parse it now.
17169 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17170 }
17171
17172 pm_token_t operator = not_provided(parser);
17173 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17174
17175 pm_node_list_append(&assocs, assoc);
17176 break;
17177 }
17178 }
17180 default: {
17181 // If we get anything else, then this is an error. For this we'll
17182 // create a missing node for the value and create an assoc node for
17183 // the first node in the list.
17184 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17185 pm_parser_err_node(parser, first_node, diag_id);
17186
17187 pm_token_t operator = not_provided(parser);
17188 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17189 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17190
17191 pm_node_list_append(&assocs, assoc);
17192 break;
17193 }
17194 }
17195
17196 // If there are any other assocs, then we'll parse them now.
17197 while (accept1(parser, PM_TOKEN_COMMA)) {
17198 // Here we need to break to support trailing commas.
17200 // Trailing commas are not allowed to follow a rest pattern.
17201 if (rest != NULL) {
17202 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17203 }
17204
17205 break;
17206 }
17207
17208 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17209 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17210
17211 if (rest == NULL) {
17212 rest = assoc;
17213 } else {
17214 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17215 pm_node_list_append(&assocs, assoc);
17216 }
17217 } else {
17218 pm_node_t *key;
17219
17220 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17221 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17222
17224 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17225 } else if (!pm_symbol_node_label_p(key)) {
17226 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17227 }
17228 } else {
17229 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17230 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17231 }
17232
17233 parse_pattern_hash_key(parser, &keys, key);
17234 pm_node_t *value = NULL;
17235
17237 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17238 } else {
17239 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17240 }
17241
17242 pm_token_t operator = not_provided(parser);
17243 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17244
17245 if (rest != NULL) {
17246 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17247 }
17248
17249 pm_node_list_append(&assocs, assoc);
17250 }
17251 }
17252
17253 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17254 xfree(assocs.nodes);
17255
17256 pm_static_literals_free(&keys);
17257 return node;
17258}
17259
17263static pm_node_t *
17264parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17265 switch (parser->current.type) {
17267 case PM_TOKEN_METHOD_NAME: {
17268 parser_lex(parser);
17269 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17270
17271 int depth;
17272 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17273 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17274 }
17275
17276 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17277 return (pm_node_t *) pm_local_variable_target_node_create(
17278 parser,
17279 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17280 constant_id,
17281 (uint32_t) (depth == -1 ? 0 : depth)
17282 );
17283 }
17285 pm_token_t opening = parser->current;
17286 parser_lex(parser);
17287
17288 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17289 // If we have an empty array pattern, then we'll just return a new
17290 // array pattern node.
17291 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17292 }
17293
17294 // Otherwise, we'll parse the inner pattern, then deal with it depending
17295 // on the type it returns.
17296 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17297
17298 accept1(parser, PM_TOKEN_NEWLINE);
17299 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17300 pm_token_t closing = parser->previous;
17301
17302 switch (PM_NODE_TYPE(inner)) {
17303 case PM_ARRAY_PATTERN_NODE: {
17304 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17305 if (pattern_node->opening_loc.start == NULL) {
17306 pattern_node->base.location.start = opening.start;
17307 pattern_node->base.location.end = closing.end;
17308
17309 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17310 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17311
17312 return (pm_node_t *) pattern_node;
17313 }
17314
17315 break;
17316 }
17317 case PM_FIND_PATTERN_NODE: {
17318 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17319 if (pattern_node->opening_loc.start == NULL) {
17320 pattern_node->base.location.start = opening.start;
17321 pattern_node->base.location.end = closing.end;
17322
17323 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17324 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17325
17326 return (pm_node_t *) pattern_node;
17327 }
17328
17329 break;
17330 }
17331 default:
17332 break;
17333 }
17334
17335 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17336 pm_array_pattern_node_requireds_append(node, inner);
17337 return (pm_node_t *) node;
17338 }
17339 case PM_TOKEN_BRACE_LEFT: {
17340 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17341 parser->pattern_matching_newlines = false;
17342
17344 pm_token_t opening = parser->current;
17345 parser_lex(parser);
17346
17347 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17348 // If we have an empty hash pattern, then we'll just return a new hash
17349 // pattern node.
17350 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17351 } else {
17352 pm_node_t *first_node;
17353
17354 switch (parser->current.type) {
17355 case PM_TOKEN_LABEL:
17356 parser_lex(parser);
17357 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17358 break;
17360 first_node = parse_pattern_keyword_rest(parser, captures);
17361 break;
17363 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17364 break;
17365 default: {
17366 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17367 parser_lex(parser);
17368
17369 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17370 break;
17371 }
17372 }
17373
17374 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17375
17376 accept1(parser, PM_TOKEN_NEWLINE);
17377 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17378 pm_token_t closing = parser->previous;
17379
17380 node->base.location.start = opening.start;
17381 node->base.location.end = closing.end;
17382
17383 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17384 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17385 }
17386
17387 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17388 return (pm_node_t *) node;
17389 }
17390 case PM_TOKEN_UDOT_DOT:
17391 case PM_TOKEN_UDOT_DOT_DOT: {
17392 pm_token_t operator = parser->current;
17393 parser_lex(parser);
17394
17395 // Since we have a unary range operator, we need to parse the subsequent
17396 // expression as the right side of the range.
17397 switch (parser->current.type) {
17398 case PM_CASE_PRIMITIVE: {
17399 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17400 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17401 }
17402 default: {
17403 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17404 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17405 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17406 }
17407 }
17408 }
17409 case PM_CASE_PRIMITIVE: {
17410 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17411
17412 // If we found a label, we need to immediately return to the caller.
17413 if (pm_symbol_node_label_p(node)) return node;
17414
17415 // Now that we have a primitive, we need to check if it's part of a range.
17416 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17417 pm_token_t operator = parser->previous;
17418
17419 // Now that we have the operator, we need to check if this is followed
17420 // by another expression. If it is, then we will create a full range
17421 // node. Otherwise, we'll create an endless range.
17422 switch (parser->current.type) {
17423 case PM_CASE_PRIMITIVE: {
17424 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17425 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17426 }
17427 default:
17428 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17429 }
17430 }
17431
17432 return node;
17433 }
17434 case PM_TOKEN_CARET: {
17435 parser_lex(parser);
17436 pm_token_t operator = parser->previous;
17437
17438 // At this point we have a pin operator. We need to check the subsequent
17439 // expression to determine if it's a variable or an expression.
17440 switch (parser->current.type) {
17441 case PM_TOKEN_IDENTIFIER: {
17442 parser_lex(parser);
17443 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17444
17445 if (variable == NULL) {
17446 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17447 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17448 }
17449
17450 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17451 }
17453 parser_lex(parser);
17454 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17455
17456 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17457 }
17459 parser_lex(parser);
17460 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17461
17462 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17463 }
17465 parser_lex(parser);
17466 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17467
17468 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17469 }
17471 parser_lex(parser);
17472 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17473
17474 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17475 }
17477 parser_lex(parser);
17478 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17479
17480 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17481 }
17483 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17484 parser->pattern_matching_newlines = false;
17485
17486 pm_token_t lparen = parser->current;
17487 parser_lex(parser);
17488
17489 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17490 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17491
17492 accept1(parser, PM_TOKEN_NEWLINE);
17493 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17494 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17495 }
17496 default: {
17497 // If we get here, then we have a pin operator followed by something
17498 // not understood. We'll create a missing node and return that.
17499 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17500 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17501 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17502 }
17503 }
17504 }
17505 case PM_TOKEN_UCOLON_COLON: {
17506 pm_token_t delimiter = parser->current;
17507 parser_lex(parser);
17508
17509 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17510 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17511
17512 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17513 }
17514 case PM_TOKEN_CONSTANT: {
17515 pm_token_t constant = parser->current;
17516 parser_lex(parser);
17517
17518 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17519 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17520 }
17521 default:
17522 pm_parser_err_current(parser, diag_id);
17523 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17524 }
17525}
17526
17531static pm_node_t *
17532parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17533 pm_node_t *node = first_node;
17534
17535 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17536 pm_token_t operator = parser->previous;
17537
17538 switch (parser->current.type) {
17542 case PM_TOKEN_CARET:
17543 case PM_TOKEN_CONSTANT:
17545 case PM_TOKEN_UDOT_DOT:
17547 case PM_CASE_PRIMITIVE: {
17548 if (node == NULL) {
17549 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17550 } else {
17551 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17552 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17553 }
17554
17555 break;
17556 }
17559 pm_token_t opening = parser->current;
17560 parser_lex(parser);
17561
17562 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17563 accept1(parser, PM_TOKEN_NEWLINE);
17564 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17565 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17566
17567 if (node == NULL) {
17568 node = right;
17569 } else {
17570 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17571 }
17572
17573 break;
17574 }
17575 default: {
17576 pm_parser_err_current(parser, diag_id);
17577 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17578
17579 if (node == NULL) {
17580 node = right;
17581 } else {
17582 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17583 }
17584
17585 break;
17586 }
17587 }
17588 }
17589
17590 // If we have an =>, then we are assigning this pattern to a variable.
17591 // In this case we should create an assignment node.
17592 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17593 pm_token_t operator = parser->previous;
17594 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17595
17596 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17597 int depth;
17598
17599 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17600 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17601 }
17602
17603 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17604 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17605 parser,
17606 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17607 constant_id,
17608 (uint32_t) (depth == -1 ? 0 : depth)
17609 );
17610
17611 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17612 }
17613
17614 return node;
17615}
17616
17620static pm_node_t *
17621parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17622 pm_node_t *node = NULL;
17623
17624 bool leading_rest = false;
17625 bool trailing_rest = false;
17626
17627 switch (parser->current.type) {
17628 case PM_TOKEN_LABEL: {
17629 parser_lex(parser);
17630 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17631 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17632
17633 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17634 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17635 }
17636
17637 return node;
17638 }
17639 case PM_TOKEN_USTAR_STAR: {
17640 node = parse_pattern_keyword_rest(parser, captures);
17641 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17642
17643 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17644 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17645 }
17646
17647 return node;
17648 }
17649 case PM_TOKEN_STRING_BEGIN: {
17650 // We need special handling for string beginnings because they could
17651 // be dynamic symbols leading to hash patterns.
17652 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17653
17654 if (pm_symbol_node_label_p(node)) {
17655 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17656
17657 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17658 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17659 }
17660
17661 return node;
17662 }
17663
17664 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17665 break;
17666 }
17667 case PM_TOKEN_USTAR: {
17668 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17669 parser_lex(parser);
17670 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17671 leading_rest = true;
17672 break;
17673 }
17674 }
17676 default:
17677 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17678 break;
17679 }
17680
17681 // If we got a dynamic label symbol, then we need to treat it like the
17682 // beginning of a hash pattern.
17683 if (pm_symbol_node_label_p(node)) {
17684 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17685 }
17686
17687 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17688 // If we have a comma, then we are now parsing either an array pattern
17689 // or a find pattern. We need to parse all of the patterns, put them
17690 // into a big list, and then determine which type of node we have.
17691 pm_node_list_t nodes = { 0 };
17692 pm_node_list_append(&nodes, node);
17693
17694 // Gather up all of the patterns into the list.
17695 while (accept1(parser, PM_TOKEN_COMMA)) {
17696 // Break early here in case we have a trailing comma.
17698 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17699 pm_node_list_append(&nodes, node);
17700 trailing_rest = true;
17701 break;
17702 }
17703
17704 if (accept1(parser, PM_TOKEN_USTAR)) {
17705 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17706
17707 // If we have already parsed a splat pattern, then this is an
17708 // error. We will continue to parse the rest of the patterns,
17709 // but we will indicate it as an error.
17710 if (trailing_rest) {
17711 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17712 }
17713
17714 trailing_rest = true;
17715 } else {
17716 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17717 }
17718
17719 pm_node_list_append(&nodes, node);
17720 }
17721
17722 // If the first pattern and the last pattern are rest patterns, then we
17723 // will call this a find pattern, regardless of how many rest patterns
17724 // are in between because we know we already added the appropriate
17725 // errors. Otherwise we will create an array pattern.
17726 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17727 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17728
17729 if (nodes.size == 2) {
17730 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17731 }
17732 } else {
17733 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17734
17735 if (leading_rest && trailing_rest) {
17736 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17737 }
17738 }
17739
17740 xfree(nodes.nodes);
17741 } else if (leading_rest) {
17742 // Otherwise, if we parsed a single splat pattern, then we know we have
17743 // an array pattern, so we can go ahead and create that node.
17744 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17745 }
17746
17747 return node;
17748}
17749
17755static inline void
17756parse_negative_numeric(pm_node_t *node) {
17757 switch (PM_NODE_TYPE(node)) {
17758 case PM_INTEGER_NODE: {
17759 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17760 cast->base.location.start--;
17761 cast->value.negative = true;
17762 break;
17763 }
17764 case PM_FLOAT_NODE: {
17765 pm_float_node_t *cast = (pm_float_node_t *) node;
17766 cast->base.location.start--;
17767 cast->value = -cast->value;
17768 break;
17769 }
17770 case PM_RATIONAL_NODE: {
17771 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17772 cast->base.location.start--;
17773 cast->numerator.negative = true;
17774 break;
17775 }
17776 case PM_IMAGINARY_NODE:
17777 node->location.start--;
17778 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17779 break;
17780 default:
17781 assert(false && "unreachable");
17782 break;
17783 }
17784}
17785
17791static void
17792pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17793 switch (diag_id) {
17794 case PM_ERR_HASH_KEY: {
17795 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17796 break;
17797 }
17798 case PM_ERR_HASH_VALUE:
17799 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17800 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17801 break;
17802 }
17803 case PM_ERR_UNARY_RECEIVER: {
17804 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17805 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17806 break;
17807 }
17808 case PM_ERR_UNARY_DISALLOWED:
17809 case PM_ERR_EXPECT_ARGUMENT: {
17810 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17811 break;
17812 }
17813 default:
17814 pm_parser_err_previous(parser, diag_id);
17815 break;
17816 }
17817}
17818
17822static void
17823parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17824#define CONTEXT_NONE 0
17825#define CONTEXT_THROUGH_ENSURE 1
17826#define CONTEXT_THROUGH_ELSE 2
17827
17828 pm_context_node_t *context_node = parser->current_context;
17829 int context = CONTEXT_NONE;
17830
17831 while (context_node != NULL) {
17832 switch (context_node->context) {
17840 case PM_CONTEXT_DEFINED:
17842 // These are the good cases. We're allowed to have a retry here.
17843 return;
17844 case PM_CONTEXT_CLASS:
17845 case PM_CONTEXT_DEF:
17847 case PM_CONTEXT_MAIN:
17848 case PM_CONTEXT_MODULE:
17849 case PM_CONTEXT_PREEXE:
17850 case PM_CONTEXT_SCLASS:
17851 // These are the bad cases. We're not allowed to have a retry in
17852 // these contexts.
17853 if (context == CONTEXT_NONE) {
17854 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17855 } else if (context == CONTEXT_THROUGH_ENSURE) {
17856 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17857 } else if (context == CONTEXT_THROUGH_ELSE) {
17858 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17859 }
17860 return;
17868 // These are also bad cases, but with a more specific error
17869 // message indicating the else.
17870 context = CONTEXT_THROUGH_ELSE;
17871 break;
17879 // These are also bad cases, but with a more specific error
17880 // message indicating the ensure.
17881 context = CONTEXT_THROUGH_ENSURE;
17882 break;
17883 case PM_CONTEXT_NONE:
17884 // This case should never happen.
17885 assert(false && "unreachable");
17886 break;
17887 case PM_CONTEXT_BEGIN:
17890 case PM_CONTEXT_CASE_IN:
17893 case PM_CONTEXT_ELSE:
17894 case PM_CONTEXT_ELSIF:
17895 case PM_CONTEXT_EMBEXPR:
17897 case PM_CONTEXT_FOR:
17898 case PM_CONTEXT_IF:
17903 case PM_CONTEXT_PARENS:
17904 case PM_CONTEXT_POSTEXE:
17906 case PM_CONTEXT_TERNARY:
17907 case PM_CONTEXT_UNLESS:
17908 case PM_CONTEXT_UNTIL:
17909 case PM_CONTEXT_WHILE:
17910 // In these contexts we should continue walking up the list of
17911 // contexts.
17912 break;
17913 }
17914
17915 context_node = context_node->prev;
17916 }
17917
17918#undef CONTEXT_NONE
17919#undef CONTEXT_ENSURE
17920#undef CONTEXT_ELSE
17921}
17922
17926static void
17927parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17928 pm_context_node_t *context_node = parser->current_context;
17929
17930 while (context_node != NULL) {
17931 switch (context_node->context) {
17932 case PM_CONTEXT_DEF:
17934 case PM_CONTEXT_DEFINED:
17938 // These are the good cases. We're allowed to have a block exit
17939 // in these contexts.
17940 return;
17941 case PM_CONTEXT_CLASS:
17945 case PM_CONTEXT_MAIN:
17946 case PM_CONTEXT_MODULE:
17950 case PM_CONTEXT_SCLASS:
17954 // These are the bad cases. We're not allowed to have a retry in
17955 // these contexts.
17956 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17957 return;
17958 case PM_CONTEXT_NONE:
17959 // This case should never happen.
17960 assert(false && "unreachable");
17961 break;
17962 case PM_CONTEXT_BEGIN:
17971 case PM_CONTEXT_CASE_IN:
17974 case PM_CONTEXT_ELSE:
17975 case PM_CONTEXT_ELSIF:
17976 case PM_CONTEXT_EMBEXPR:
17978 case PM_CONTEXT_FOR:
17979 case PM_CONTEXT_IF:
17987 case PM_CONTEXT_PARENS:
17988 case PM_CONTEXT_POSTEXE:
17990 case PM_CONTEXT_PREEXE:
17992 case PM_CONTEXT_TERNARY:
17993 case PM_CONTEXT_UNLESS:
17994 case PM_CONTEXT_UNTIL:
17995 case PM_CONTEXT_WHILE:
17996 // In these contexts we should continue walking up the list of
17997 // contexts.
17998 break;
17999 }
18000
18001 context_node = context_node->prev;
18002 }
18003}
18004
18009typedef struct {
18012
18014 const uint8_t *start;
18015
18017 const uint8_t *end;
18018
18027
18032static void
18033parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18035 pm_location_t location;
18036
18037 if (callback_data->shared) {
18038 location = (pm_location_t) { .start = start, .end = end };
18039 } else {
18040 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18041 }
18042
18043 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18044}
18045
18049static void
18050parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18051 const pm_string_t *unescaped = &node->unescaped;
18053 .parser = parser,
18054 .start = node->base.location.start,
18055 .end = node->base.location.end,
18056 .shared = unescaped->type == PM_STRING_SHARED
18057 };
18058
18059 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18060}
18061
18065static inline pm_node_t *
18066parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18067 switch (parser->current.type) {
18069 parser_lex(parser);
18070
18071 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18072 pm_accepts_block_stack_push(parser, true);
18073 bool parsed_bare_hash = false;
18074
18075 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18076 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18077
18078 // Handle the case where we don't have a comma and we have a
18079 // newline followed by a right bracket.
18080 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18081 break;
18082 }
18083
18084 // Ensure that we have a comma between elements in the array.
18085 if (array->elements.size > 0) {
18086 if (accept1(parser, PM_TOKEN_COMMA)) {
18087 // If there was a comma but we also accepts a newline,
18088 // then this is a syntax error.
18089 if (accepted_newline) {
18090 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18091 }
18092 } else {
18093 // If there was no comma, then we need to add a syntax
18094 // error.
18095 const uint8_t *location = parser->previous.end;
18096 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18097
18098 parser->previous.start = location;
18099 parser->previous.type = PM_TOKEN_MISSING;
18100 }
18101 }
18102
18103 // If we have a right bracket immediately following a comma,
18104 // this is allowed since it's a trailing comma. In this case we
18105 // can break out of the loop.
18106 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18107
18108 pm_node_t *element;
18109
18110 if (accept1(parser, PM_TOKEN_USTAR)) {
18111 pm_token_t operator = parser->previous;
18112 pm_node_t *expression = NULL;
18113
18114 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18115 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18116 } else {
18117 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18118 }
18119
18120 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18121 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18122 if (parsed_bare_hash) {
18123 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18124 }
18125
18126 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18127 pm_static_literals_t hash_keys = { 0 };
18128
18130 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18131 }
18132
18133 pm_static_literals_free(&hash_keys);
18134 parsed_bare_hash = true;
18135 } else {
18136 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18137
18138 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18139 if (parsed_bare_hash) {
18140 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18141 }
18142
18143 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18144 pm_static_literals_t hash_keys = { 0 };
18145 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18146
18147 pm_token_t operator;
18148 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18149 operator = parser->previous;
18150 } else {
18151 operator = not_provided(parser);
18152 }
18153
18154 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18155 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18156 pm_keyword_hash_node_elements_append(hash, assoc);
18157
18158 element = (pm_node_t *) hash;
18159 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18160 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18161 }
18162
18163 pm_static_literals_free(&hash_keys);
18164 parsed_bare_hash = true;
18165 }
18166 }
18167
18168 pm_array_node_elements_append(array, element);
18169 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18170 }
18171
18172 accept1(parser, PM_TOKEN_NEWLINE);
18173
18174 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18175 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18176 parser->previous.start = parser->previous.end;
18177 parser->previous.type = PM_TOKEN_MISSING;
18178 }
18179
18180 pm_array_node_close_set(array, &parser->previous);
18181 pm_accepts_block_stack_pop(parser);
18182
18183 return (pm_node_t *) array;
18184 }
18187 pm_token_t opening = parser->current;
18188 pm_node_flags_t flags = 0;
18189
18190 pm_node_list_t current_block_exits = { 0 };
18191 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18192
18193 parser_lex(parser);
18194 while (true) {
18195 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18197 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18198 break;
18199 }
18200 }
18201
18202 // If this is the end of the file or we match a right parenthesis, then
18203 // we have an empty parentheses node, and we can immediately return.
18204 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18205 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18206
18207 pop_block_exits(parser, previous_block_exits);
18208 pm_node_list_free(&current_block_exits);
18209
18210 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18211 }
18212
18213 // Otherwise, we're going to parse the first statement in the list
18214 // of statements within the parentheses.
18215 pm_accepts_block_stack_push(parser, true);
18216 context_push(parser, PM_CONTEXT_PARENS);
18217 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18218 context_pop(parser);
18219
18220 // Determine if this statement is followed by a terminator. In the
18221 // case of a single statement, this is fine. But in the case of
18222 // multiple statements it's required.
18223 bool terminator_found = false;
18224
18225 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18226 terminator_found = true;
18228 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18229 terminator_found = true;
18230 }
18231
18232 if (terminator_found) {
18233 while (true) {
18234 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18236 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18237 break;
18238 }
18239 }
18240 }
18241
18242 // If we hit a right parenthesis, then we're done parsing the
18243 // parentheses node, and we can check which kind of node we should
18244 // return.
18245 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18247 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18248 }
18249
18250 parser_lex(parser);
18251 pm_accepts_block_stack_pop(parser);
18252
18253 pop_block_exits(parser, previous_block_exits);
18254 pm_node_list_free(&current_block_exits);
18255
18256 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18257 // If we have a single statement and are ending on a right
18258 // parenthesis, then we need to check if this is possibly a
18259 // multiple target node.
18260 pm_multi_target_node_t *multi_target;
18261
18262 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18263 multi_target = (pm_multi_target_node_t *) statement;
18264 } else {
18265 multi_target = pm_multi_target_node_create(parser);
18266 pm_multi_target_node_targets_append(parser, multi_target, statement);
18267 }
18268
18269 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18270 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18271
18272 multi_target->lparen_loc = lparen_loc;
18273 multi_target->rparen_loc = rparen_loc;
18274 multi_target->base.location.start = lparen_loc.start;
18275 multi_target->base.location.end = rparen_loc.end;
18276
18277 pm_node_t *result;
18278 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18279 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18280 accept1(parser, PM_TOKEN_NEWLINE);
18281 } else {
18282 result = (pm_node_t *) multi_target;
18283 }
18284
18285 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18286 // All set, this is explicitly allowed by the parent
18287 // context.
18288 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18289 // All set, we're inside a for loop and we're parsing
18290 // multiple targets.
18291 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18292 // Multi targets are not allowed when it's not a
18293 // statement level.
18294 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18295 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18296 // Multi targets must be followed by an equal sign in
18297 // order to be valid (or a right parenthesis if they are
18298 // nested).
18299 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18300 }
18301
18302 return result;
18303 }
18304
18305 // If we have a single statement and are ending on a right parenthesis
18306 // and we didn't return a multiple assignment node, then we can return a
18307 // regular parentheses node now.
18308 pm_statements_node_t *statements = pm_statements_node_create(parser);
18309 pm_statements_node_body_append(parser, statements, statement, true);
18310
18311 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18312 }
18313
18314 // If we have more than one statement in the set of parentheses,
18315 // then we are going to parse all of them as a list of statements.
18316 // We'll do that here.
18317 context_push(parser, PM_CONTEXT_PARENS);
18319
18320 pm_statements_node_t *statements = pm_statements_node_create(parser);
18321 pm_statements_node_body_append(parser, statements, statement, true);
18322
18323 // If we didn't find a terminator and we didn't find a right
18324 // parenthesis, then this is a syntax error.
18325 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18326 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18327 }
18328
18329 // Parse each statement within the parentheses.
18330 while (true) {
18331 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18332 pm_statements_node_body_append(parser, statements, node, true);
18333
18334 // If we're recovering from a syntax error, then we need to stop
18335 // parsing the statements now.
18336 if (parser->recovering) {
18337 // If this is the level of context where the recovery has
18338 // happened, then we can mark the parser as done recovering.
18339 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18340 break;
18341 }
18342
18343 // If we couldn't parse an expression at all, then we need to
18344 // bail out of the loop.
18345 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18346
18347 // If we successfully parsed a statement, then we are going to
18348 // need terminator to delimit them.
18349 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18350 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18351 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18352 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18353 break;
18354 } else if (!match1(parser, PM_TOKEN_EOF)) {
18355 // If we're at the end of the file, then we're going to add
18356 // an error after this for the ) anyway.
18357 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18358 }
18359 }
18360
18361 context_pop(parser);
18362 pm_accepts_block_stack_pop(parser);
18363 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18364
18365 // When we're parsing multi targets, we allow them to be followed by
18366 // a right parenthesis if they are at the statement level. This is
18367 // only possible if they are the final statement in a parentheses.
18368 // We need to explicitly reject that here.
18369 {
18370 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18371
18372 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18373 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18374 pm_multi_target_node_targets_append(parser, multi_target, statement);
18375
18376 statement = (pm_node_t *) multi_target;
18377 statements->body.nodes[statements->body.size - 1] = statement;
18378 }
18379
18380 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18381 const uint8_t *offset = statement->location.end;
18382 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18383 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18384
18385 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18386 statements->body.nodes[statements->body.size - 1] = statement;
18387
18388 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18389 }
18390 }
18391
18392 pop_block_exits(parser, previous_block_exits);
18393 pm_node_list_free(&current_block_exits);
18394
18395 pm_void_statements_check(parser, statements, true);
18396 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18397 }
18398 case PM_TOKEN_BRACE_LEFT: {
18399 // If we were passed a current_hash_keys via the parser, then that
18400 // means we're already parsing a hash and we want to share the set
18401 // of hash keys with this inner hash we're about to parse for the
18402 // sake of warnings. We'll set it to NULL after we grab it to make
18403 // sure subsequent expressions don't use it. Effectively this is a
18404 // way of getting around passing it to every call to
18405 // parse_expression.
18406 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18407 parser->current_hash_keys = NULL;
18408
18409 pm_accepts_block_stack_push(parser, true);
18410 parser_lex(parser);
18411
18412 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18413
18414 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18415 if (current_hash_keys != NULL) {
18416 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18417 } else {
18418 pm_static_literals_t hash_keys = { 0 };
18419 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18420 pm_static_literals_free(&hash_keys);
18421 }
18422
18423 accept1(parser, PM_TOKEN_NEWLINE);
18424 }
18425
18426 pm_accepts_block_stack_pop(parser);
18427 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18428 pm_hash_node_closing_loc_set(node, &parser->previous);
18429
18430 return (pm_node_t *) node;
18431 }
18433 parser_lex(parser);
18434
18435 pm_token_t opening = parser->previous;
18436 opening.type = PM_TOKEN_STRING_BEGIN;
18437 opening.end = opening.start + 1;
18438
18439 pm_token_t content = parser->previous;
18440 content.type = PM_TOKEN_STRING_CONTENT;
18441 content.start = content.start + 1;
18442
18443 pm_token_t closing = not_provided(parser);
18444 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18445 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18446
18447 // Characters can be followed by strings in which case they are
18448 // automatically concatenated.
18449 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18450 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18451 }
18452
18453 return node;
18454 }
18456 parser_lex(parser);
18457 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18458
18459 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18460 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18461 }
18462
18463 return node;
18464 }
18465 case PM_TOKEN_CONSTANT: {
18466 parser_lex(parser);
18467 pm_token_t constant = parser->previous;
18468
18469 // If a constant is immediately followed by parentheses, then this is in
18470 // fact a method call, not a constant read.
18471 if (
18472 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18473 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18474 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18475 match1(parser, PM_TOKEN_BRACE_LEFT)
18476 ) {
18477 pm_arguments_t arguments = { 0 };
18478 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18479 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18480 }
18481
18482 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18483
18484 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18485 // If we get here, then we have a comma immediately following a
18486 // constant, so we're going to parse this as a multiple assignment.
18487 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18488 }
18489
18490 return node;
18491 }
18492 case PM_TOKEN_UCOLON_COLON: {
18493 parser_lex(parser);
18494 pm_token_t delimiter = parser->previous;
18495
18496 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18497 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18498
18499 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18500 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18501 }
18502
18503 return node;
18504 }
18505 case PM_TOKEN_UDOT_DOT:
18506 case PM_TOKEN_UDOT_DOT_DOT: {
18507 pm_token_t operator = parser->current;
18508 parser_lex(parser);
18509
18510 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18511
18512 // Unary .. and ... are special because these are non-associative
18513 // operators that can also be unary operators. In this case we need
18514 // to explicitly reject code that has a .. or ... that follows this
18515 // expression.
18516 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18517 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18518 }
18519
18520 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18521 }
18522 case PM_TOKEN_FLOAT:
18523 parser_lex(parser);
18524 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18526 parser_lex(parser);
18527 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18529 parser_lex(parser);
18530 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18532 parser_lex(parser);
18533 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18535 parser_lex(parser);
18536 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18537
18538 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18539 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18540 }
18541
18542 return node;
18543 }
18545 parser_lex(parser);
18546 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18547
18548 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18549 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18550 }
18551
18552 return node;
18553 }
18555 parser_lex(parser);
18556 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18557
18558 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18559 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18560 }
18561
18562 return node;
18563 }
18565 case PM_TOKEN_METHOD_NAME: {
18566 parser_lex(parser);
18567 pm_token_t identifier = parser->previous;
18568 pm_node_t *node = parse_variable_call(parser);
18569
18570 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18571 // If parse_variable_call returned with a call node, then we
18572 // know the identifier is not in the local table. In that case
18573 // we need to check if there are arguments following the
18574 // identifier.
18575 pm_call_node_t *call = (pm_call_node_t *) node;
18576 pm_arguments_t arguments = { 0 };
18577
18578 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18579 // Since we found arguments, we need to turn off the
18580 // variable call bit in the flags.
18581 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18582
18583 call->opening_loc = arguments.opening_loc;
18584 call->arguments = arguments.arguments;
18585 call->closing_loc = arguments.closing_loc;
18586 call->block = arguments.block;
18587
18588 if (arguments.block != NULL) {
18589 call->base.location.end = arguments.block->location.end;
18590 } else if (arguments.closing_loc.start == NULL) {
18591 if (arguments.arguments != NULL) {
18592 call->base.location.end = arguments.arguments->base.location.end;
18593 } else {
18594 call->base.location.end = call->message_loc.end;
18595 }
18596 } else {
18597 call->base.location.end = arguments.closing_loc.end;
18598 }
18599 }
18600 } else {
18601 // Otherwise, we know the identifier is in the local table. This
18602 // can still be a method call if it is followed by arguments or
18603 // a block, so we need to check for that here.
18604 if (
18605 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18606 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18607 match1(parser, PM_TOKEN_BRACE_LEFT)
18608 ) {
18609 pm_arguments_t arguments = { 0 };
18610 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18611 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18612
18614 // If we're about to convert an 'it' implicit local
18615 // variable read into a method call, we need to remove
18616 // it from the list of implicit local variables.
18617 parse_target_implicit_parameter(parser, node);
18618 } else {
18619 // Otherwise, we're about to convert a regular local
18620 // variable read into a method call, in which case we
18621 // need to indicate that this was not a read for the
18622 // purposes of warnings.
18624
18625 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18626 parse_target_implicit_parameter(parser, node);
18627 } else {
18629 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18630 }
18631 }
18632
18633 pm_node_destroy(parser, node);
18634 return (pm_node_t *) fcall;
18635 }
18636 }
18637
18638 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18639 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18640 }
18641
18642 return node;
18643 }
18645 // Here we have found a heredoc. We'll parse it and add it to the
18646 // list of strings.
18647 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18648 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18649
18650 size_t common_whitespace = (size_t) -1;
18651 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18652
18653 parser_lex(parser);
18654 pm_token_t opening = parser->previous;
18655
18656 pm_node_t *node;
18657 pm_node_t *part;
18658
18659 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18660 // If we get here, then we have an empty heredoc. We'll create
18661 // an empty content token and return an empty string node.
18662 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18663 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18664
18665 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18666 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18667 } else {
18668 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18669 }
18670
18671 node->location.end = opening.end;
18672 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18673 // If we get here, then we tried to find something in the
18674 // heredoc but couldn't actually parse anything, so we'll just
18675 // return a missing node.
18676 //
18677 // parse_string_part handles its own errors, so there is no need
18678 // for us to add one here.
18679 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18680 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18681 // If we get here, then the part that we parsed was plain string
18682 // content and we're at the end of the heredoc, so we can return
18683 // just a string node with the heredoc opening and closing as
18684 // its opening and closing.
18685 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18686 pm_string_node_t *cast = (pm_string_node_t *) part;
18687
18688 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18689 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18690 cast->base.location = cast->opening_loc;
18691
18692 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18693 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18694 cast->base.type = PM_X_STRING_NODE;
18695 }
18696
18697 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18698 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18699 }
18700
18701 node = (pm_node_t *) cast;
18702 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18703 } else {
18704 // If we get here, then we have multiple parts in the heredoc,
18705 // so we'll need to create an interpolated string node to hold
18706 // them all.
18707 pm_node_list_t parts = { 0 };
18708 pm_node_list_append(&parts, part);
18709
18710 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18711 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18712 pm_node_list_append(&parts, part);
18713 }
18714 }
18715
18716 // Now that we have all of the parts, create the correct type of
18717 // interpolated node.
18718 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18719 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18720 cast->parts = parts;
18721
18722 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18723 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18724
18725 cast->base.location = cast->opening_loc;
18726 node = (pm_node_t *) cast;
18727 } else {
18728 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18729 pm_node_list_free(&parts);
18730
18731 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18732 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18733
18734 cast->base.location = cast->opening_loc;
18735 node = (pm_node_t *) cast;
18736 }
18737
18738 // If this is a heredoc that is indented with a ~, then we need
18739 // to dedent each line by the common leading whitespace.
18740 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18741 pm_node_list_t *nodes;
18742 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18743 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18744 } else {
18745 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18746 }
18747
18748 parse_heredoc_dedent(parser, nodes, common_whitespace);
18749 }
18750 }
18751
18752 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18753 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18754 }
18755
18756 return node;
18757 }
18759 parser_lex(parser);
18760 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18761
18762 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18763 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18764 }
18765
18766 return node;
18767 }
18768 case PM_TOKEN_INTEGER: {
18769 pm_node_flags_t base = parser->integer_base;
18770 parser_lex(parser);
18771 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18772 }
18774 pm_node_flags_t base = parser->integer_base;
18775 parser_lex(parser);
18776 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18777 }
18779 pm_node_flags_t base = parser->integer_base;
18780 parser_lex(parser);
18781 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18782 }
18784 pm_node_flags_t base = parser->integer_base;
18785 parser_lex(parser);
18786 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18787 }
18789 parser_lex(parser);
18790 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18792 parser_lex(parser);
18793 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18795 parser_lex(parser);
18796 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18798 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18799 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18800 }
18801
18802 parser_lex(parser);
18803 pm_token_t keyword = parser->previous;
18804
18805 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18806 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18807
18808 switch (PM_NODE_TYPE(new_name)) {
18814 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18815 }
18816 } else {
18817 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18818 }
18819
18820 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18821 }
18822 case PM_SYMBOL_NODE:
18825 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18826 }
18827 }
18829 default:
18830 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18831 }
18832 }
18833 case PM_TOKEN_KEYWORD_CASE: {
18834 size_t opening_newline_index = token_newline_index(parser);
18835 parser_lex(parser);
18836
18837 pm_token_t case_keyword = parser->previous;
18838 pm_node_t *predicate = NULL;
18839
18840 pm_node_list_t current_block_exits = { 0 };
18841 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18842
18843 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18844 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18845 predicate = NULL;
18846 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18847 predicate = NULL;
18848 } else if (!token_begins_expression_p(parser->current.type)) {
18849 predicate = NULL;
18850 } else {
18851 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18852 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18853 }
18854
18855 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18856 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18857 parser_lex(parser);
18858
18859 pop_block_exits(parser, previous_block_exits);
18860 pm_node_list_free(&current_block_exits);
18861
18862 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18863 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18864 }
18865
18866 // At this point we can create a case node, though we don't yet know
18867 // if it is a case-in or case-when node.
18868 pm_token_t end_keyword = not_provided(parser);
18869 pm_node_t *node;
18870
18871 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18872 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18873 pm_static_literals_t literals = { 0 };
18874
18875 // At this point we've seen a when keyword, so we know this is a
18876 // case-when node. We will continue to parse the when nodes
18877 // until we hit the end of the list.
18878 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18879 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18880 parser_lex(parser);
18881
18882 pm_token_t when_keyword = parser->previous;
18883 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18884
18885 do {
18886 if (accept1(parser, PM_TOKEN_USTAR)) {
18887 pm_token_t operator = parser->previous;
18888 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18889
18890 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18891 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18892
18893 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18894 } else {
18895 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18896 pm_when_node_conditions_append(when_node, condition);
18897
18898 // If we found a missing node, then this is a syntax
18899 // error and we should stop looping.
18900 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18901
18902 // If this is a string node, then we need to mark it
18903 // as frozen because when clause strings are frozen.
18904 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18905 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18906 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18907 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18908 }
18909
18910 pm_when_clause_static_literals_add(parser, &literals, condition);
18911 }
18912 } while (accept1(parser, PM_TOKEN_COMMA));
18913
18914 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18915 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18916 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18917 }
18918 } else {
18919 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18920 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18921 }
18922
18924 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18925 if (statements != NULL) {
18926 pm_when_node_statements_set(when_node, statements);
18927 }
18928 }
18929
18930 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18931 }
18932
18933 // If we didn't parse any conditions (in or when) then we need
18934 // to indicate that we have an error.
18935 if (case_node->conditions.size == 0) {
18936 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18937 }
18938
18939 pm_static_literals_free(&literals);
18940 node = (pm_node_t *) case_node;
18941 } else {
18942 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18943
18944 // If this is a case-match node (i.e., it is a pattern matching
18945 // case statement) then we must have a predicate.
18946 if (predicate == NULL) {
18947 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18948 }
18949
18950 // At this point we expect that we're parsing a case-in node. We
18951 // will continue to parse the in nodes until we hit the end of
18952 // the list.
18953 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18954 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18955
18956 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18957 parser->pattern_matching_newlines = true;
18958
18959 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18960 parser->command_start = false;
18961 parser_lex(parser);
18962
18963 pm_token_t in_keyword = parser->previous;
18964
18965 pm_constant_id_list_t captures = { 0 };
18966 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18967
18968 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18969 pm_constant_id_list_free(&captures);
18970
18971 // Since we're in the top-level of the case-in node we need
18972 // to check for guard clauses in the form of `if` or
18973 // `unless` statements.
18974 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18975 pm_token_t keyword = parser->previous;
18976 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18977 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18978 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18979 pm_token_t keyword = parser->previous;
18980 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18981 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18982 }
18983
18984 // Now we need to check for the terminator of the in node's
18985 // pattern. It can be a newline or semicolon optionally
18986 // followed by a `then` keyword.
18987 pm_token_t then_keyword;
18988 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18989 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18990 then_keyword = parser->previous;
18991 } else {
18992 then_keyword = not_provided(parser);
18993 }
18994 } else {
18995 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18996 then_keyword = parser->previous;
18997 }
18998
18999 // Now we can actually parse the statements associated with
19000 // the in node.
19001 pm_statements_node_t *statements;
19003 statements = NULL;
19004 } else {
19005 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
19006 }
19007
19008 // Now that we have the full pattern and statements, we can
19009 // create the node and attach it to the case node.
19010 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
19011 pm_case_match_node_condition_append(case_node, condition);
19012 }
19013
19014 // If we didn't parse any conditions (in or when) then we need
19015 // to indicate that we have an error.
19016 if (case_node->conditions.size == 0) {
19017 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19018 }
19019
19020 node = (pm_node_t *) case_node;
19021 }
19022
19023 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19024 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19025 pm_token_t else_keyword = parser->previous;
19026 pm_else_node_t *else_node;
19027
19028 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19029 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19030 } else {
19031 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19032 }
19033
19034 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19035 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19036 } else {
19037 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19038 }
19039 }
19040
19041 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19042 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19043
19044 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19045 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19046 } else {
19047 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19048 }
19049
19050 pop_block_exits(parser, previous_block_exits);
19051 pm_node_list_free(&current_block_exits);
19052
19053 return node;
19054 }
19056 size_t opening_newline_index = token_newline_index(parser);
19057 parser_lex(parser);
19058
19059 pm_token_t begin_keyword = parser->previous;
19060 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19061
19062 pm_node_list_t current_block_exits = { 0 };
19063 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19064 pm_statements_node_t *begin_statements = NULL;
19065
19067 pm_accepts_block_stack_push(parser, true);
19068 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19069 pm_accepts_block_stack_pop(parser);
19070 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19071 }
19072
19073 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19074 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19075 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19076
19077 begin_node->base.location.end = parser->previous.end;
19078 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19079
19080 pop_block_exits(parser, previous_block_exits);
19081 pm_node_list_free(&current_block_exits);
19082
19083 return (pm_node_t *) begin_node;
19084 }
19086 pm_node_list_t current_block_exits = { 0 };
19087 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19088
19089 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19090 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19091 }
19092
19093 parser_lex(parser);
19094 pm_token_t keyword = parser->previous;
19095
19096 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19097 pm_token_t opening = parser->previous;
19098 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19099
19100 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19101 pm_context_t context = parser->current_context->context;
19102 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19103 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19104 }
19105
19106 flush_block_exits(parser, previous_block_exits);
19107 pm_node_list_free(&current_block_exits);
19108
19109 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19110 }
19114 parser_lex(parser);
19115
19116 pm_token_t keyword = parser->previous;
19117 pm_arguments_t arguments = { 0 };
19118
19119 if (
19120 token_begins_expression_p(parser->current.type) ||
19121 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19122 ) {
19123 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19124
19125 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19126 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19127 }
19128 }
19129
19130 switch (keyword.type) {
19132 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19133 if (!parser->partial_script) parse_block_exit(parser, node);
19134 return node;
19135 }
19136 case PM_TOKEN_KEYWORD_NEXT: {
19137 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19138 if (!parser->partial_script) parse_block_exit(parser, node);
19139 return node;
19140 }
19142 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19143 parse_return(parser, node);
19144 return node;
19145 }
19146 default:
19147 assert(false && "unreachable");
19148 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19149 }
19150 }
19152 parser_lex(parser);
19153
19154 pm_token_t keyword = parser->previous;
19155 pm_arguments_t arguments = { 0 };
19156 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19157
19158 if (
19159 arguments.opening_loc.start == NULL &&
19160 arguments.arguments == NULL &&
19161 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19162 ) {
19163 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19164 }
19165
19166 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19167 }
19169 parser_lex(parser);
19170
19171 pm_token_t keyword = parser->previous;
19172 pm_arguments_t arguments = { 0 };
19173 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19174
19175 // It's possible that we've parsed a block argument through our
19176 // call to parse_arguments_list. If we found one, we should mark it
19177 // as invalid and destroy it, as we don't have a place for it on the
19178 // yield node.
19179 if (arguments.block != NULL) {
19180 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19181 pm_node_destroy(parser, arguments.block);
19182 arguments.block = NULL;
19183 }
19184
19185 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19186 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19187
19188 return node;
19189 }
19191 size_t opening_newline_index = token_newline_index(parser);
19192 parser_lex(parser);
19193
19194 pm_token_t class_keyword = parser->previous;
19195 pm_do_loop_stack_push(parser, false);
19196
19197 pm_node_list_t current_block_exits = { 0 };
19198 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19199
19200 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19201 pm_token_t operator = parser->previous;
19202 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19203
19204 pm_parser_scope_push(parser, true);
19205 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19206 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19207 }
19208
19209 pm_node_t *statements = NULL;
19211 pm_accepts_block_stack_push(parser, true);
19212 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19213 pm_accepts_block_stack_pop(parser);
19214 }
19215
19216 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19217 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19218 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19219 } else {
19220 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19221 }
19222
19223 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19224
19225 pm_constant_id_list_t locals;
19226 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19227
19228 pm_parser_scope_pop(parser);
19229 pm_do_loop_stack_pop(parser);
19230
19231 flush_block_exits(parser, previous_block_exits);
19232 pm_node_list_free(&current_block_exits);
19233
19234 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19235 }
19236
19237 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19238 pm_token_t name = parser->previous;
19239 if (name.type != PM_TOKEN_CONSTANT) {
19240 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19241 }
19242
19243 pm_token_t inheritance_operator;
19244 pm_node_t *superclass;
19245
19246 if (match1(parser, PM_TOKEN_LESS)) {
19247 inheritance_operator = parser->current;
19248 lex_state_set(parser, PM_LEX_STATE_BEG);
19249
19250 parser->command_start = true;
19251 parser_lex(parser);
19252
19253 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19254 } else {
19255 inheritance_operator = not_provided(parser);
19256 superclass = NULL;
19257 }
19258
19259 pm_parser_scope_push(parser, true);
19260
19261 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19262 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19263 } else {
19264 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19265 }
19266 pm_node_t *statements = NULL;
19267
19269 pm_accepts_block_stack_push(parser, true);
19270 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19271 pm_accepts_block_stack_pop(parser);
19272 }
19273
19274 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19275 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19276 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19277 } else {
19278 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19279 }
19280
19281 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19282
19283 if (context_def_p(parser)) {
19284 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19285 }
19286
19287 pm_constant_id_list_t locals;
19288 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19289
19290 pm_parser_scope_pop(parser);
19291 pm_do_loop_stack_pop(parser);
19292
19293 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19294 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19295 }
19296
19297 pop_block_exits(parser, previous_block_exits);
19298 pm_node_list_free(&current_block_exits);
19299
19300 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19301 }
19302 case PM_TOKEN_KEYWORD_DEF: {
19303 pm_node_list_t current_block_exits = { 0 };
19304 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19305
19306 pm_token_t def_keyword = parser->current;
19307 size_t opening_newline_index = token_newline_index(parser);
19308
19309 pm_node_t *receiver = NULL;
19310 pm_token_t operator = not_provided(parser);
19311 pm_token_t name;
19312
19313 // This context is necessary for lexing `...` in a bare params
19314 // correctly. It must be pushed before lexing the first param, so it
19315 // is here.
19316 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19317 parser_lex(parser);
19318
19319 // This will be false if the method name is not a valid identifier
19320 // but could be followed by an operator.
19321 bool valid_name = true;
19322
19323 switch (parser->current.type) {
19324 case PM_CASE_OPERATOR:
19325 pm_parser_scope_push(parser, true);
19326 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19327 parser_lex(parser);
19328
19329 name = parser->previous;
19330 break;
19331 case PM_TOKEN_IDENTIFIER: {
19332 parser_lex(parser);
19333
19334 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19335 receiver = parse_variable_call(parser);
19336
19337 pm_parser_scope_push(parser, true);
19338 lex_state_set(parser, PM_LEX_STATE_FNAME);
19339 parser_lex(parser);
19340
19341 operator = parser->previous;
19342 name = parse_method_definition_name(parser);
19343 } else {
19344 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19345 pm_parser_scope_push(parser, true);
19346
19347 name = parser->previous;
19348 }
19349
19350 break;
19351 }
19355 valid_name = false;
19357 case PM_TOKEN_CONSTANT:
19365 pm_parser_scope_push(parser, true);
19366 parser_lex(parser);
19367
19368 pm_token_t identifier = parser->previous;
19369
19370 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19371 lex_state_set(parser, PM_LEX_STATE_FNAME);
19372 parser_lex(parser);
19373 operator = parser->previous;
19374
19375 switch (identifier.type) {
19376 case PM_TOKEN_CONSTANT:
19377 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19378 break;
19380 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19381 break;
19383 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19384 break;
19386 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19387 break;
19389 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19390 break;
19392 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19393 break;
19395 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19396 break;
19398 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19399 break;
19401 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19402 break;
19404 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19405 break;
19407 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19408 break;
19409 default:
19410 break;
19411 }
19412
19413 name = parse_method_definition_name(parser);
19414 } else {
19415 if (!valid_name) {
19416 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19417 }
19418
19419 name = identifier;
19420 }
19421 break;
19422 }
19424 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19425 // the inner expression of this parenthesis should not be
19426 // processed under this context. Thus, the context is popped
19427 // here.
19428 context_pop(parser);
19429 parser_lex(parser);
19430
19431 pm_token_t lparen = parser->previous;
19432 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19433
19434 accept1(parser, PM_TOKEN_NEWLINE);
19435 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19436 pm_token_t rparen = parser->previous;
19437
19438 lex_state_set(parser, PM_LEX_STATE_FNAME);
19439 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19440
19441 operator = parser->previous;
19442 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19443
19444 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19445 // reason as described the above.
19446 pm_parser_scope_push(parser, true);
19447 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19448 name = parse_method_definition_name(parser);
19449 break;
19450 }
19451 default:
19452 pm_parser_scope_push(parser, true);
19453 name = parse_method_definition_name(parser);
19454 break;
19455 }
19456
19457 pm_token_t lparen;
19458 pm_token_t rparen;
19459 pm_parameters_node_t *params;
19460
19461 switch (parser->current.type) {
19463 parser_lex(parser);
19464 lparen = parser->previous;
19465
19466 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19467 params = NULL;
19468 } else {
19469 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19470 }
19471
19472 lex_state_set(parser, PM_LEX_STATE_BEG);
19473 parser->command_start = true;
19474
19475 context_pop(parser);
19476 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19477 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19478 parser->previous.start = parser->previous.end;
19479 parser->previous.type = PM_TOKEN_MISSING;
19480 }
19481
19482 rparen = parser->previous;
19483 break;
19484 }
19485 case PM_CASE_PARAMETER: {
19486 // If we're about to lex a label, we need to add the label
19487 // state to make sure the next newline is ignored.
19488 if (parser->current.type == PM_TOKEN_LABEL) {
19489 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19490 }
19491
19492 lparen = not_provided(parser);
19493 rparen = not_provided(parser);
19494 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19495
19496 context_pop(parser);
19497 break;
19498 }
19499 default: {
19500 lparen = not_provided(parser);
19501 rparen = not_provided(parser);
19502 params = NULL;
19503
19504 context_pop(parser);
19505 break;
19506 }
19507 }
19508
19509 pm_node_t *statements = NULL;
19510 pm_token_t equal;
19511 pm_token_t end_keyword;
19512
19513 if (accept1(parser, PM_TOKEN_EQUAL)) {
19514 if (token_is_setter_name(&name)) {
19515 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19516 }
19517 equal = parser->previous;
19518
19519 context_push(parser, PM_CONTEXT_DEF);
19520 pm_do_loop_stack_push(parser, false);
19521 statements = (pm_node_t *) pm_statements_node_create(parser);
19522
19523 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19524
19525 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19526 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19527
19528 pm_token_t rescue_keyword = parser->previous;
19529 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19530 context_pop(parser);
19531
19532 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19533 }
19534
19535 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19536 pm_do_loop_stack_pop(parser);
19537 context_pop(parser);
19538 end_keyword = not_provided(parser);
19539 } else {
19540 equal = not_provided(parser);
19541
19542 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19543 lex_state_set(parser, PM_LEX_STATE_BEG);
19544 parser->command_start = true;
19545 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19546 } else {
19547 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19548 }
19549
19550 pm_accepts_block_stack_push(parser, true);
19551 pm_do_loop_stack_push(parser, false);
19552
19554 pm_accepts_block_stack_push(parser, true);
19555 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19556 pm_accepts_block_stack_pop(parser);
19557 }
19558
19560 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19561 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19562 } else {
19563 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19564 }
19565
19566 pm_accepts_block_stack_pop(parser);
19567 pm_do_loop_stack_pop(parser);
19568
19569 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19570 end_keyword = parser->previous;
19571 }
19572
19573 pm_constant_id_list_t locals;
19574 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19575 pm_parser_scope_pop(parser);
19576
19582 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19583
19584 flush_block_exits(parser, previous_block_exits);
19585 pm_node_list_free(&current_block_exits);
19586
19587 return (pm_node_t *) pm_def_node_create(
19588 parser,
19589 name_id,
19590 &name,
19591 receiver,
19592 params,
19593 statements,
19594 &locals,
19595 &def_keyword,
19596 &operator,
19597 &lparen,
19598 &rparen,
19599 &equal,
19600 &end_keyword
19601 );
19602 }
19604 parser_lex(parser);
19605 pm_token_t keyword = parser->previous;
19606
19607 pm_token_t lparen;
19608 pm_token_t rparen;
19609 pm_node_t *expression;
19610
19611 context_push(parser, PM_CONTEXT_DEFINED);
19612 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19613
19614 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19615 lparen = parser->previous;
19616
19617 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19618 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19619 lparen = not_provided(parser);
19620 rparen = not_provided(parser);
19621 } else {
19622 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19623
19624 if (parser->recovering) {
19625 rparen = not_provided(parser);
19626 } else {
19627 accept1(parser, PM_TOKEN_NEWLINE);
19628 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19629 rparen = parser->previous;
19630 }
19631 }
19632 } else {
19633 lparen = not_provided(parser);
19634 rparen = not_provided(parser);
19635 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19636 }
19637
19638 context_pop(parser);
19639 return (pm_node_t *) pm_defined_node_create(
19640 parser,
19641 &lparen,
19642 expression,
19643 &rparen,
19644 &PM_LOCATION_TOKEN_VALUE(&keyword)
19645 );
19646 }
19648 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19649 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19650 }
19651
19652 parser_lex(parser);
19653 pm_token_t keyword = parser->previous;
19654
19655 if (context_def_p(parser)) {
19656 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19657 }
19658
19659 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19660 pm_token_t opening = parser->previous;
19661 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19662
19663 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19664 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19665 }
19667 parser_lex(parser);
19668 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19669 case PM_TOKEN_KEYWORD_FOR: {
19670 size_t opening_newline_index = token_newline_index(parser);
19671 parser_lex(parser);
19672
19673 pm_token_t for_keyword = parser->previous;
19674 pm_node_t *index;
19675
19676 context_push(parser, PM_CONTEXT_FOR_INDEX);
19677
19678 // First, parse out the first index expression.
19679 if (accept1(parser, PM_TOKEN_USTAR)) {
19680 pm_token_t star_operator = parser->previous;
19681 pm_node_t *name = NULL;
19682
19683 if (token_begins_expression_p(parser->current.type)) {
19684 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19685 }
19686
19687 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19688 } else if (token_begins_expression_p(parser->current.type)) {
19689 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19690 } else {
19691 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19692 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19693 }
19694
19695 // Now, if there are multiple index expressions, parse them out.
19696 if (match1(parser, PM_TOKEN_COMMA)) {
19697 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19698 } else {
19699 index = parse_target(parser, index, false, false);
19700 }
19701
19702 context_pop(parser);
19703 pm_do_loop_stack_push(parser, true);
19704
19705 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19706 pm_token_t in_keyword = parser->previous;
19707
19708 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19709 pm_do_loop_stack_pop(parser);
19710
19711 pm_token_t do_keyword;
19712 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19713 do_keyword = parser->previous;
19714 } else {
19715 do_keyword = not_provided(parser);
19716 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19717 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19718 }
19719 }
19720
19721 pm_statements_node_t *statements = NULL;
19722 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19723 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19724 }
19725
19726 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19727 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19728
19729 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19730 }
19732 if (parser_end_of_line_p(parser)) {
19733 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19734 }
19735
19736 size_t opening_newline_index = token_newline_index(parser);
19737 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19738 parser_lex(parser);
19739
19740 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19742 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19743 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19744 }
19745
19746 parser_lex(parser);
19747 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19748 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19749
19750 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19751 pm_node_destroy(parser, name);
19752 } else {
19753 pm_undef_node_append(undef, name);
19754
19755 while (match1(parser, PM_TOKEN_COMMA)) {
19756 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19757 parser_lex(parser);
19758 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19759
19760 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19761 pm_node_destroy(parser, name);
19762 break;
19763 }
19764
19765 pm_undef_node_append(undef, name);
19766 }
19767 }
19768
19769 return (pm_node_t *) undef;
19770 }
19771 case PM_TOKEN_KEYWORD_NOT: {
19772 parser_lex(parser);
19773
19774 pm_token_t message = parser->previous;
19775 pm_arguments_t arguments = { 0 };
19776 pm_node_t *receiver = NULL;
19777
19778 accept1(parser, PM_TOKEN_NEWLINE);
19779
19780 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19781 pm_token_t lparen = parser->previous;
19782
19783 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19784 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19785 } else {
19786 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19787 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19788
19789 if (!parser->recovering) {
19790 accept1(parser, PM_TOKEN_NEWLINE);
19791 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19792 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19793 }
19794 }
19795 } else {
19796 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19797 }
19798
19799 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19800 }
19802 size_t opening_newline_index = token_newline_index(parser);
19803 parser_lex(parser);
19804
19805 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19806 }
19808 pm_node_list_t current_block_exits = { 0 };
19809 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19810
19811 size_t opening_newline_index = token_newline_index(parser);
19812 parser_lex(parser);
19813 pm_token_t module_keyword = parser->previous;
19814
19815 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19816 pm_token_t name;
19817
19818 // If we can recover from a syntax error that occurred while parsing
19819 // the name of the module, then we'll handle that here.
19820 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19821 pop_block_exits(parser, previous_block_exits);
19822 pm_node_list_free(&current_block_exits);
19823
19824 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19825 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19826 }
19827
19828 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19829 pm_token_t double_colon = parser->previous;
19830
19831 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19832 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19833 }
19834
19835 // Here we retrieve the name of the module. If it wasn't a constant,
19836 // then it's possible that `module foo` was passed, which is a
19837 // syntax error. We handle that here as well.
19838 name = parser->previous;
19839 if (name.type != PM_TOKEN_CONSTANT) {
19840 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19841 }
19842
19843 pm_parser_scope_push(parser, true);
19844 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19845 pm_node_t *statements = NULL;
19846
19848 pm_accepts_block_stack_push(parser, true);
19849 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19850 pm_accepts_block_stack_pop(parser);
19851 }
19852
19854 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19855 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19856 } else {
19857 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19858 }
19859
19860 pm_constant_id_list_t locals;
19861 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19862
19863 pm_parser_scope_pop(parser);
19864 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19865
19866 if (context_def_p(parser)) {
19867 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19868 }
19869
19870 pop_block_exits(parser, previous_block_exits);
19871 pm_node_list_free(&current_block_exits);
19872
19873 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19874 }
19876 parser_lex(parser);
19877 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19878 case PM_TOKEN_KEYWORD_REDO: {
19879 parser_lex(parser);
19880
19881 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19882 if (!parser->partial_script) parse_block_exit(parser, node);
19883
19884 return node;
19885 }
19887 parser_lex(parser);
19888
19889 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19890 parse_retry(parser, node);
19891
19892 return node;
19893 }
19895 parser_lex(parser);
19896 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19898 parser_lex(parser);
19899 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19901 size_t opening_newline_index = token_newline_index(parser);
19902
19903 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19904 pm_do_loop_stack_push(parser, true);
19905
19906 parser_lex(parser);
19907 pm_token_t keyword = parser->previous;
19908 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19909
19910 pm_do_loop_stack_pop(parser);
19911 context_pop(parser);
19912
19913 pm_token_t do_keyword;
19914 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19915 do_keyword = parser->previous;
19916 } else {
19917 do_keyword = not_provided(parser);
19918 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19919 }
19920
19921 pm_statements_node_t *statements = NULL;
19922 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19923 pm_accepts_block_stack_push(parser, true);
19924 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19925 pm_accepts_block_stack_pop(parser);
19926 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19927 }
19928
19929 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19930 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19931
19932 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19933 }
19935 size_t opening_newline_index = token_newline_index(parser);
19936
19937 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19938 pm_do_loop_stack_push(parser, true);
19939
19940 parser_lex(parser);
19941 pm_token_t keyword = parser->previous;
19942 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19943
19944 pm_do_loop_stack_pop(parser);
19945 context_pop(parser);
19946
19947 pm_token_t do_keyword;
19948 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19949 do_keyword = parser->previous;
19950 } else {
19951 do_keyword = not_provided(parser);
19952 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19953 }
19954
19955 pm_statements_node_t *statements = NULL;
19956 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19957 pm_accepts_block_stack_push(parser, true);
19958 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19959 pm_accepts_block_stack_pop(parser);
19960 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19961 }
19962
19963 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19964 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19965
19966 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19967 }
19969 parser_lex(parser);
19970 pm_token_t opening = parser->previous;
19971 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19972
19973 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19974 accept1(parser, PM_TOKEN_WORDS_SEP);
19975 if (match1(parser, PM_TOKEN_STRING_END)) break;
19976
19977 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19978 pm_token_t opening = not_provided(parser);
19979 pm_token_t closing = not_provided(parser);
19980 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19981 }
19982
19983 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19984 }
19985
19986 pm_token_t closing = parser->current;
19987 if (match1(parser, PM_TOKEN_EOF)) {
19988 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19989 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19990 } else {
19991 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19992 }
19993 pm_array_node_close_set(array, &closing);
19994
19995 return (pm_node_t *) array;
19996 }
19998 parser_lex(parser);
19999 pm_token_t opening = parser->previous;
20000 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20001
20002 // This is the current node that we are parsing that will be added to the
20003 // list of elements.
20004 pm_node_t *current = NULL;
20005
20006 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20007 switch (parser->current.type) {
20008 case PM_TOKEN_WORDS_SEP: {
20009 if (current == NULL) {
20010 // If we hit a separator before we have any content, then we don't
20011 // need to do anything.
20012 } else {
20013 // If we hit a separator after we've hit content, then we need to
20014 // append that content to the list and reset the current node.
20015 pm_array_node_elements_append(array, current);
20016 current = NULL;
20017 }
20018
20019 parser_lex(parser);
20020 break;
20021 }
20023 pm_token_t opening = not_provided(parser);
20024 pm_token_t closing = not_provided(parser);
20025
20026 if (current == NULL) {
20027 // If we hit content and the current node is NULL, then this is
20028 // the first string content we've seen. In that case we're going
20029 // to create a new string node and set that to the current.
20030 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20031 parser_lex(parser);
20032 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20033 // If we hit string content and the current node is an
20034 // interpolated string, then we need to append the string content
20035 // to the list of child nodes.
20036 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20037 parser_lex(parser);
20038
20039 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20040 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20041 // If we hit string content and the current node is a symbol node,
20042 // then we need to convert the current node into an interpolated
20043 // string and add the string content to the list of child nodes.
20044 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20045 pm_token_t bounds = not_provided(parser);
20046
20047 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20048 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20049 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20050 parser_lex(parser);
20051
20052 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20053 pm_interpolated_symbol_node_append(interpolated, first_string);
20054 pm_interpolated_symbol_node_append(interpolated, second_string);
20055
20056 xfree(current);
20057 current = (pm_node_t *) interpolated;
20058 } else {
20059 assert(false && "unreachable");
20060 }
20061
20062 break;
20063 }
20064 case PM_TOKEN_EMBVAR: {
20065 bool start_location_set = false;
20066 if (current == NULL) {
20067 // If we hit an embedded variable and the current node is NULL,
20068 // then this is the start of a new string. We'll set the current
20069 // node to a new interpolated string.
20070 pm_token_t opening = not_provided(parser);
20071 pm_token_t closing = not_provided(parser);
20072 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20073 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20074 // If we hit an embedded variable and the current node is a string
20075 // node, then we'll convert the current into an interpolated
20076 // string and add the string node to the list of parts.
20077 pm_token_t opening = not_provided(parser);
20078 pm_token_t closing = not_provided(parser);
20079 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20080
20081 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20082 pm_interpolated_symbol_node_append(interpolated, current);
20083 interpolated->base.location.start = current->location.start;
20084 start_location_set = true;
20085 current = (pm_node_t *) interpolated;
20086 } else {
20087 // If we hit an embedded variable and the current node is an
20088 // interpolated string, then we'll just add the embedded variable.
20089 }
20090
20091 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20092 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20093 if (!start_location_set) {
20094 current->location.start = part->location.start;
20095 }
20096 break;
20097 }
20099 bool start_location_set = false;
20100 if (current == NULL) {
20101 // If we hit an embedded expression and the current node is NULL,
20102 // then this is the start of a new string. We'll set the current
20103 // node to a new interpolated string.
20104 pm_token_t opening = not_provided(parser);
20105 pm_token_t closing = not_provided(parser);
20106 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20107 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20108 // If we hit an embedded expression and the current node is a
20109 // string node, then we'll convert the current into an
20110 // interpolated string and add the string node to the list of
20111 // parts.
20112 pm_token_t opening = not_provided(parser);
20113 pm_token_t closing = not_provided(parser);
20114 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20115
20116 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20117 pm_interpolated_symbol_node_append(interpolated, current);
20118 interpolated->base.location.start = current->location.start;
20119 start_location_set = true;
20120 current = (pm_node_t *) interpolated;
20121 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20122 // If we hit an embedded expression and the current node is an
20123 // interpolated string, then we'll just continue on.
20124 } else {
20125 assert(false && "unreachable");
20126 }
20127
20128 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20129 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20130 if (!start_location_set) {
20131 current->location.start = part->location.start;
20132 }
20133 break;
20134 }
20135 default:
20136 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20137 parser_lex(parser);
20138 break;
20139 }
20140 }
20141
20142 // If we have a current node, then we need to append it to the list.
20143 if (current) {
20144 pm_array_node_elements_append(array, current);
20145 }
20146
20147 pm_token_t closing = parser->current;
20148 if (match1(parser, PM_TOKEN_EOF)) {
20149 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20150 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20151 } else {
20152 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20153 }
20154 pm_array_node_close_set(array, &closing);
20155
20156 return (pm_node_t *) array;
20157 }
20159 parser_lex(parser);
20160 pm_token_t opening = parser->previous;
20161 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20162
20163 // skip all leading whitespaces
20164 accept1(parser, PM_TOKEN_WORDS_SEP);
20165
20166 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20167 accept1(parser, PM_TOKEN_WORDS_SEP);
20168 if (match1(parser, PM_TOKEN_STRING_END)) break;
20169
20170 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20171 pm_token_t opening = not_provided(parser);
20172 pm_token_t closing = not_provided(parser);
20173
20174 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20175 pm_array_node_elements_append(array, string);
20176 }
20177
20178 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20179 }
20180
20181 pm_token_t closing = parser->current;
20182 if (match1(parser, PM_TOKEN_EOF)) {
20183 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20184 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20185 } else {
20186 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20187 }
20188
20189 pm_array_node_close_set(array, &closing);
20190 return (pm_node_t *) array;
20191 }
20193 parser_lex(parser);
20194 pm_token_t opening = parser->previous;
20195 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20196
20197 // This is the current node that we are parsing that will be added
20198 // to the list of elements.
20199 pm_node_t *current = NULL;
20200
20201 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20202 switch (parser->current.type) {
20203 case PM_TOKEN_WORDS_SEP: {
20204 // Reset the explicit encoding if we hit a separator
20205 // since each element can have its own encoding.
20206 parser->explicit_encoding = NULL;
20207
20208 if (current == NULL) {
20209 // If we hit a separator before we have any content,
20210 // then we don't need to do anything.
20211 } else {
20212 // If we hit a separator after we've hit content,
20213 // then we need to append that content to the list
20214 // and reset the current node.
20215 pm_array_node_elements_append(array, current);
20216 current = NULL;
20217 }
20218
20219 parser_lex(parser);
20220 break;
20221 }
20223 pm_token_t opening = not_provided(parser);
20224 pm_token_t closing = not_provided(parser);
20225
20226 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20227 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20228 parser_lex(parser);
20229
20230 if (current == NULL) {
20231 // If we hit content and the current node is NULL,
20232 // then this is the first string content we've seen.
20233 // In that case we're going to create a new string
20234 // node and set that to the current.
20235 current = string;
20236 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20237 // If we hit string content and the current node is
20238 // an interpolated string, then we need to append
20239 // the string content to the list of child nodes.
20240 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20241 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20242 // If we hit string content and the current node is
20243 // a string node, then we need to convert the
20244 // current node into an interpolated string and add
20245 // the string content to the list of child nodes.
20246 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20247 pm_interpolated_string_node_append(interpolated, current);
20248 pm_interpolated_string_node_append(interpolated, string);
20249 current = (pm_node_t *) interpolated;
20250 } else {
20251 assert(false && "unreachable");
20252 }
20253
20254 break;
20255 }
20256 case PM_TOKEN_EMBVAR: {
20257 if (current == NULL) {
20258 // If we hit an embedded variable and the current
20259 // node is NULL, then this is the start of a new
20260 // string. We'll set the current node to a new
20261 // interpolated string.
20262 pm_token_t opening = not_provided(parser);
20263 pm_token_t closing = not_provided(parser);
20264 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20265 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20266 // If we hit an embedded variable and the current
20267 // node is a string node, then we'll convert the
20268 // current into an interpolated string and add the
20269 // string node to the list of parts.
20270 pm_token_t opening = not_provided(parser);
20271 pm_token_t closing = not_provided(parser);
20272 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20273 pm_interpolated_string_node_append(interpolated, current);
20274 current = (pm_node_t *) interpolated;
20275 } else {
20276 // If we hit an embedded variable and the current
20277 // node is an interpolated string, then we'll just
20278 // add the embedded variable.
20279 }
20280
20281 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20282 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20283 break;
20284 }
20286 if (current == NULL) {
20287 // If we hit an embedded expression and the current
20288 // node is NULL, then this is the start of a new
20289 // string. We'll set the current node to a new
20290 // interpolated string.
20291 pm_token_t opening = not_provided(parser);
20292 pm_token_t closing = not_provided(parser);
20293 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20294 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20295 // If we hit an embedded expression and the current
20296 // node is a string node, then we'll convert the
20297 // current into an interpolated string and add the
20298 // string node to the list of parts.
20299 pm_token_t opening = not_provided(parser);
20300 pm_token_t closing = not_provided(parser);
20301 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20302 pm_interpolated_string_node_append(interpolated, current);
20303 current = (pm_node_t *) interpolated;
20304 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20305 // If we hit an embedded expression and the current
20306 // node is an interpolated string, then we'll just
20307 // continue on.
20308 } else {
20309 assert(false && "unreachable");
20310 }
20311
20312 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20313 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20314 break;
20315 }
20316 default:
20317 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20318 parser_lex(parser);
20319 break;
20320 }
20321 }
20322
20323 // If we have a current node, then we need to append it to the list.
20324 if (current) {
20325 pm_array_node_elements_append(array, current);
20326 }
20327
20328 pm_token_t closing = parser->current;
20329 if (match1(parser, PM_TOKEN_EOF)) {
20330 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20331 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20332 } else {
20333 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20334 }
20335
20336 pm_array_node_close_set(array, &closing);
20337 return (pm_node_t *) array;
20338 }
20339 case PM_TOKEN_REGEXP_BEGIN: {
20340 pm_token_t opening = parser->current;
20341 parser_lex(parser);
20342
20343 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20344 // If we get here, then we have an end immediately after a start. In
20345 // that case we'll create an empty content token and return an
20346 // uninterpolated regular expression.
20347 pm_token_t content = (pm_token_t) {
20349 .start = parser->previous.end,
20350 .end = parser->previous.end
20351 };
20352
20353 parser_lex(parser);
20354
20355 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20357
20358 return node;
20359 }
20360
20362
20363 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20364 // In this case we've hit string content so we know the regular
20365 // expression at least has something in it. We'll need to check if the
20366 // following token is the end (in which case we can return a plain
20367 // regular expression) or if it's not then it has interpolation.
20368 pm_string_t unescaped = parser->current_string;
20369 pm_token_t content = parser->current;
20370 bool ascii_only = parser->current_regular_expression_ascii_only;
20371 parser_lex(parser);
20372
20373 // If we hit an end, then we can create a regular expression
20374 // node without interpolation, which can be represented more
20375 // succinctly and more easily compiled.
20376 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20377 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20378
20379 // If we're not immediately followed by a =~, then we want
20380 // to parse all of the errors at this point. If it is
20381 // followed by a =~, then it will get parsed higher up while
20382 // parsing the named captures as well.
20383 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20384 parse_regular_expression_errors(parser, node);
20385 }
20386
20387 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20388 return (pm_node_t *) node;
20389 }
20390
20391 // If we get here, then we have interpolation so we'll need to create
20392 // a regular expression node with interpolation.
20393 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20394
20395 pm_token_t opening = not_provided(parser);
20396 pm_token_t closing = not_provided(parser);
20397 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20398
20399 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20400 // This is extremely strange, but the first string part of a
20401 // regular expression will always be tagged as binary if we
20402 // are in a US-ASCII file, no matter its contents.
20403 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20404 }
20405
20406 pm_interpolated_regular_expression_node_append(interpolated, part);
20407 } else {
20408 // If the first part of the body of the regular expression is not a
20409 // string content, then we have interpolation and we need to create an
20410 // interpolated regular expression node.
20411 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20412 }
20413
20414 // Now that we're here and we have interpolation, we'll parse all of the
20415 // parts into the list.
20416 pm_node_t *part;
20417 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20418 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20419 pm_interpolated_regular_expression_node_append(interpolated, part);
20420 }
20421 }
20422
20423 pm_token_t closing = parser->current;
20424 if (match1(parser, PM_TOKEN_EOF)) {
20425 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20426 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20427 } else {
20428 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20429 }
20430
20431 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20432 return (pm_node_t *) interpolated;
20433 }
20434 case PM_TOKEN_BACKTICK:
20436 parser_lex(parser);
20437 pm_token_t opening = parser->previous;
20438
20439 // When we get here, we don't know if this string is going to have
20440 // interpolation or not, even though it is allowed. Still, we want to be
20441 // able to return a string node without interpolation if we can since
20442 // it'll be faster.
20443 if (match1(parser, PM_TOKEN_STRING_END)) {
20444 // If we get here, then we have an end immediately after a start. In
20445 // that case we'll create an empty content token and return an
20446 // uninterpolated string.
20447 pm_token_t content = (pm_token_t) {
20449 .start = parser->previous.end,
20450 .end = parser->previous.end
20451 };
20452
20453 parser_lex(parser);
20454 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20455 }
20456
20458
20459 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20460 // In this case we've hit string content so we know the string
20461 // at least has something in it. We'll need to check if the
20462 // following token is the end (in which case we can return a
20463 // plain string) or if it's not then it has interpolation.
20464 pm_string_t unescaped = parser->current_string;
20465 pm_token_t content = parser->current;
20466 parser_lex(parser);
20467
20468 if (match1(parser, PM_TOKEN_STRING_END)) {
20469 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20470 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20471 parser_lex(parser);
20472 return node;
20473 }
20474
20475 // If we get here, then we have interpolation so we'll need to
20476 // create a string node with interpolation.
20477 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20478
20479 pm_token_t opening = not_provided(parser);
20480 pm_token_t closing = not_provided(parser);
20481
20482 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20483 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20484
20485 pm_interpolated_xstring_node_append(node, part);
20486 } else {
20487 // If the first part of the body of the string is not a string
20488 // content, then we have interpolation and we need to create an
20489 // interpolated string node.
20490 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20491 }
20492
20493 pm_node_t *part;
20494 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20495 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20496 pm_interpolated_xstring_node_append(node, part);
20497 }
20498 }
20499
20500 pm_token_t closing = parser->current;
20501 if (match1(parser, PM_TOKEN_EOF)) {
20502 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20503 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20504 } else {
20505 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20506 }
20507 pm_interpolated_xstring_node_closing_set(node, &closing);
20508
20509 return (pm_node_t *) node;
20510 }
20511 case PM_TOKEN_USTAR: {
20512 parser_lex(parser);
20513
20514 // * operators at the beginning of expressions are only valid in the
20515 // context of a multiple assignment. We enforce that here. We'll
20516 // still lex past it though and create a missing node place.
20517 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20518 pm_parser_err_prefix(parser, diag_id);
20519 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20520 }
20521
20522 pm_token_t operator = parser->previous;
20523 pm_node_t *name = NULL;
20524
20525 if (token_begins_expression_p(parser->current.type)) {
20526 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20527 }
20528
20529 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20530
20531 if (match1(parser, PM_TOKEN_COMMA)) {
20532 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20533 } else {
20534 return parse_target_validate(parser, splat, true);
20535 }
20536 }
20537 case PM_TOKEN_BANG: {
20538 if (binding_power > PM_BINDING_POWER_UNARY) {
20539 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20540 }
20541
20542 parser_lex(parser);
20543
20544 pm_token_t operator = parser->previous;
20545 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20546 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20547
20548 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20549 return (pm_node_t *) node;
20550 }
20551 case PM_TOKEN_TILDE: {
20552 if (binding_power > PM_BINDING_POWER_UNARY) {
20553 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20554 }
20555 parser_lex(parser);
20556
20557 pm_token_t operator = parser->previous;
20558 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20559 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20560
20561 return (pm_node_t *) node;
20562 }
20563 case PM_TOKEN_UMINUS: {
20564 if (binding_power > PM_BINDING_POWER_UNARY) {
20565 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20566 }
20567 parser_lex(parser);
20568
20569 pm_token_t operator = parser->previous;
20570 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20571 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20572
20573 return (pm_node_t *) node;
20574 }
20575 case PM_TOKEN_UMINUS_NUM: {
20576 parser_lex(parser);
20577
20578 pm_token_t operator = parser->previous;
20579 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20580
20581 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20582 pm_token_t exponent_operator = parser->previous;
20583 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20584 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20585 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20586 } else {
20587 switch (PM_NODE_TYPE(node)) {
20588 case PM_INTEGER_NODE:
20589 case PM_FLOAT_NODE:
20590 case PM_RATIONAL_NODE:
20591 case PM_IMAGINARY_NODE:
20592 parse_negative_numeric(node);
20593 break;
20594 default:
20595 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20596 break;
20597 }
20598 }
20599
20600 return node;
20601 }
20603 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20605
20606 size_t opening_newline_index = token_newline_index(parser);
20607 pm_accepts_block_stack_push(parser, true);
20608 parser_lex(parser);
20609
20610 pm_token_t operator = parser->previous;
20611 pm_parser_scope_push(parser, false);
20612
20613 pm_block_parameters_node_t *block_parameters;
20614
20615 switch (parser->current.type) {
20617 pm_token_t opening = parser->current;
20618 parser_lex(parser);
20619
20620 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20621 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20622 } else {
20623 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20624 }
20625
20626 accept1(parser, PM_TOKEN_NEWLINE);
20627 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20628
20629 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20630 break;
20631 }
20632 case PM_CASE_PARAMETER: {
20633 pm_accepts_block_stack_push(parser, false);
20634 pm_token_t opening = not_provided(parser);
20635 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20636 pm_accepts_block_stack_pop(parser);
20637 break;
20638 }
20639 default: {
20640 block_parameters = NULL;
20641 break;
20642 }
20643 }
20644
20645 pm_token_t opening;
20646 pm_node_t *body = NULL;
20647 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20648
20649 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20650 opening = parser->previous;
20651
20652 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20653 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20654 }
20655
20656 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20657 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20658 } else {
20659 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20660 opening = parser->previous;
20661
20663 pm_accepts_block_stack_push(parser, true);
20664 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20665 pm_accepts_block_stack_pop(parser);
20666 }
20667
20668 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20669 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20670 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20671 } else {
20672 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20673 }
20674
20675 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20676 }
20677
20678 pm_constant_id_list_t locals;
20679 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20680 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20681
20682 pm_parser_scope_pop(parser);
20683 pm_accepts_block_stack_pop(parser);
20684
20685 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20686 }
20687 case PM_TOKEN_UPLUS: {
20688 if (binding_power > PM_BINDING_POWER_UNARY) {
20689 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20690 }
20691 parser_lex(parser);
20692
20693 pm_token_t operator = parser->previous;
20694 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20695 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20696
20697 return (pm_node_t *) node;
20698 }
20700 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20701 case PM_TOKEN_SYMBOL_BEGIN: {
20702 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20703 parser_lex(parser);
20704
20705 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20706 }
20707 default: {
20708 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20709
20710 if (recoverable != PM_CONTEXT_NONE) {
20711 parser->recovering = true;
20712
20713 // If the given error is not the generic one, then we'll add it
20714 // here because it will provide more context in addition to the
20715 // recoverable error that we will also add.
20716 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20717 pm_parser_err_prefix(parser, diag_id);
20718 }
20719
20720 // If we get here, then we are assuming this token is closing a
20721 // parent context, so we'll indicate that to the user so that
20722 // they know how we behaved.
20723 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20724 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20725 // We're going to make a special case here, because "cannot
20726 // parse expression" is pretty generic, and we know here that we
20727 // have an unexpected token.
20728 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20729 } else {
20730 pm_parser_err_prefix(parser, diag_id);
20731 }
20732
20733 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20734 }
20735 }
20736}
20737
20747static pm_node_t *
20748parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20749 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20750
20751 // Contradicting binding powers, the right-hand-side value of the assignment
20752 // allows the `rescue` modifier.
20753 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20754 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20755
20756 pm_token_t rescue = parser->current;
20757 parser_lex(parser);
20758
20759 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20760 context_pop(parser);
20761
20762 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20763 }
20764
20765 return value;
20766}
20767
20772static void
20773parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20774 switch (PM_NODE_TYPE(node)) {
20775 case PM_BEGIN_NODE: {
20776 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20777 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20778 break;
20779 }
20782 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20783 break;
20784 }
20785 case PM_PARENTHESES_NODE: {
20786 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20787 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20788 break;
20789 }
20790 case PM_STATEMENTS_NODE: {
20791 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20792 const pm_node_t *statement;
20793
20794 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20795 parse_assignment_value_local(parser, statement);
20796 }
20797 break;
20798 }
20799 default:
20800 break;
20801 }
20802}
20803
20816static pm_node_t *
20817parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20818 bool permitted = true;
20819 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20820
20821 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20822 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20823
20824 parse_assignment_value_local(parser, value);
20825 bool single_value = true;
20826
20827 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20828 single_value = false;
20829
20830 pm_token_t opening = not_provided(parser);
20831 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20832
20833 pm_array_node_elements_append(array, value);
20834 value = (pm_node_t *) array;
20835
20836 while (accept1(parser, PM_TOKEN_COMMA)) {
20837 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20838
20839 pm_array_node_elements_append(array, element);
20840 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20841
20842 parse_assignment_value_local(parser, element);
20843 }
20844 }
20845
20846 // Contradicting binding powers, the right-hand-side value of the assignment
20847 // allows the `rescue` modifier.
20848 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20849 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20850
20851 pm_token_t rescue = parser->current;
20852 parser_lex(parser);
20853
20854 bool accepts_command_call_inner = false;
20855
20856 // RHS can accept command call iff the value is a call with arguments
20857 // but without parenthesis.
20858 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20859 pm_call_node_t *call_node = (pm_call_node_t *) value;
20860 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20861 accepts_command_call_inner = true;
20862 }
20863 }
20864
20865 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20866 context_pop(parser);
20867
20868 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20869 }
20870
20871 return value;
20872}
20873
20881static void
20882parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20883 if (call_node->arguments != NULL) {
20884 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20885 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20886 call_node->arguments = NULL;
20887 }
20888
20889 if (call_node->block != NULL) {
20890 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20891 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20892 call_node->block = NULL;
20893 }
20894}
20895
20920
20921static inline const uint8_t *
20922pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20923 cursor++;
20924
20925 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20926 uint8_t value = escape_hexadecimal_digit(*cursor);
20927 cursor++;
20928
20929 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20930 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20931 cursor++;
20932 }
20933
20934 pm_buffer_append_byte(unescaped, value);
20935 } else {
20936 pm_buffer_append_string(unescaped, "\\x", 2);
20937 }
20938
20939 return cursor;
20940}
20941
20942static inline const uint8_t *
20943pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20944 uint8_t value = (uint8_t) (*cursor - '0');
20945 cursor++;
20946
20947 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20948 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20949 cursor++;
20950
20951 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20952 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20953 cursor++;
20954 }
20955 }
20956
20957 pm_buffer_append_byte(unescaped, value);
20958 return cursor;
20959}
20960
20961static inline const uint8_t *
20962pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20963 const uint8_t *start = cursor - 1;
20964 cursor++;
20965
20966 if (cursor >= end) {
20967 pm_buffer_append_string(unescaped, "\\u", 2);
20968 return cursor;
20969 }
20970
20971 if (*cursor != '{') {
20972 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20973 uint32_t value = escape_unicode(parser, cursor, length);
20974
20975 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20976 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20977 }
20978
20979 return cursor + length;
20980 }
20981
20982 cursor++;
20983 for (;;) {
20984 while (cursor < end && *cursor == ' ') cursor++;
20985
20986 if (cursor >= end) break;
20987 if (*cursor == '}') {
20988 cursor++;
20989 break;
20990 }
20991
20992 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20993 uint32_t value = escape_unicode(parser, cursor, length);
20994
20995 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20996 cursor += length;
20997 }
20998
20999 return cursor;
21000}
21001
21002static void
21003pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21004 const uint8_t *end = source + length;
21005 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21006
21007 for (;;) {
21008 if (++cursor >= end) {
21009 pm_buffer_append_byte(unescaped, '\\');
21010 return;
21011 }
21012
21013 switch (*cursor) {
21014 case 'x':
21015 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21016 break;
21017 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21018 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21019 break;
21020 case 'u':
21021 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21022 break;
21023 default:
21024 pm_buffer_append_byte(unescaped, '\\');
21025 break;
21026 }
21027
21028 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21029 if (next_cursor == NULL) break;
21030
21031 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21032 cursor = next_cursor;
21033 }
21034
21035 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21036}
21037
21042static void
21043parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21045
21046 pm_parser_t *parser = callback_data->parser;
21047 pm_call_node_t *call = callback_data->call;
21048 pm_constant_id_list_t *names = &callback_data->names;
21049
21050 const uint8_t *source = pm_string_source(capture);
21051 size_t length = pm_string_length(capture);
21052 pm_buffer_t unescaped = { 0 };
21053
21054 // First, we need to handle escapes within the name of the capture group.
21055 // This is because regular expressions have three different representations
21056 // in prism. The first is the plain source code. The second is the
21057 // representation that will be sent to the regular expression engine, which
21058 // is the value of the "unescaped" field. This is poorly named, because it
21059 // actually still contains escapes, just a subset of them that the regular
21060 // expression engine knows how to handle. The third representation is fully
21061 // unescaped, which is what we need.
21062 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21063 if (PRISM_UNLIKELY(cursor != NULL)) {
21064 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21065 source = (const uint8_t *) pm_buffer_value(&unescaped);
21066 length = pm_buffer_length(&unescaped);
21067 }
21068
21069 pm_location_t location;
21070 pm_constant_id_t name;
21071
21072 // If the name of the capture group isn't a valid identifier, we do
21073 // not add it to the local table.
21074 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21075 pm_buffer_free(&unescaped);
21076 return;
21077 }
21078
21079 if (callback_data->shared) {
21080 // If the unescaped string is a slice of the source, then we can
21081 // copy the names directly. The pointers will line up.
21082 location = (pm_location_t) { .start = source, .end = source + length };
21083 name = pm_parser_constant_id_location(parser, location.start, location.end);
21084 } else {
21085 // Otherwise, the name is a slice of the malloc-ed owned string,
21086 // in which case we need to copy it out into a new string.
21087 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21088
21089 void *memory = xmalloc(length);
21090 if (memory == NULL) abort();
21091
21092 memcpy(memory, source, length);
21093 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21094 }
21095
21096 // Add this name to the list of constants if it is valid, not duplicated,
21097 // and not a keyword.
21098 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21099 pm_constant_id_list_append(names, name);
21100
21101 int depth;
21102 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21103 // If the local is not already a local but it is a keyword, then we
21104 // do not want to add a capture for this.
21105 if (pm_local_is_keyword((const char *) source, length)) {
21106 pm_buffer_free(&unescaped);
21107 return;
21108 }
21109
21110 // If the identifier is not already a local, then we will add it to
21111 // the local table.
21112 pm_parser_local_add(parser, name, location.start, location.end, 0);
21113 }
21114
21115 // Here we lazily create the MatchWriteNode since we know we're
21116 // about to add a target.
21117 if (callback_data->match == NULL) {
21118 callback_data->match = pm_match_write_node_create(parser, call);
21119 }
21120
21121 // Next, create the local variable target and add it to the list of
21122 // targets for the match.
21123 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21124 pm_node_list_append(&callback_data->match->targets, target);
21125 }
21126
21127 pm_buffer_free(&unescaped);
21128}
21129
21134static pm_node_t *
21135parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21137 .parser = parser,
21138 .call = call,
21139 .names = { 0 },
21140 .shared = content->type == PM_STRING_SHARED
21141 };
21142
21144 .parser = parser,
21145 .start = call->receiver->location.start,
21146 .end = call->receiver->location.end,
21147 .shared = content->type == PM_STRING_SHARED
21148 };
21149
21150 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21151 pm_constant_id_list_free(&callback_data.names);
21152
21153 if (callback_data.match != NULL) {
21154 return (pm_node_t *) callback_data.match;
21155 } else {
21156 return (pm_node_t *) call;
21157 }
21158}
21159
21160static inline pm_node_t *
21161parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21162 pm_token_t token = parser->current;
21163
21164 switch (token.type) {
21165 case PM_TOKEN_EQUAL: {
21166 switch (PM_NODE_TYPE(node)) {
21167 case PM_CALL_NODE: {
21168 // If we have no arguments to the call node and we need this
21169 // to be a target then this is either a method call or a
21170 // local variable write. This _must_ happen before the value
21171 // is parsed because it could be referenced in the value.
21172 pm_call_node_t *call_node = (pm_call_node_t *) node;
21174 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21175 }
21176 }
21178 case PM_CASE_WRITABLE: {
21179 parser_lex(parser);
21180 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21181
21182 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21183 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21184 }
21185
21186 return parse_write(parser, node, &token, value);
21187 }
21188 case PM_SPLAT_NODE: {
21189 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21190 pm_multi_target_node_targets_append(parser, multi_target, node);
21191
21192 parser_lex(parser);
21193 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21194 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21195 }
21197 case PM_FALSE_NODE:
21200 case PM_NIL_NODE:
21201 case PM_SELF_NODE:
21202 case PM_TRUE_NODE: {
21203 // In these special cases, we have specific error messages
21204 // and we will replace them with local variable writes.
21205 parser_lex(parser);
21206 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21207 return parse_unwriteable_write(parser, node, &token, value);
21208 }
21209 default:
21210 // In this case we have an = sign, but we don't know what
21211 // it's for. We need to treat it as an error. We'll mark it
21212 // as an error and skip past it.
21213 parser_lex(parser);
21214 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21215 return node;
21216 }
21217 }
21219 switch (PM_NODE_TYPE(node)) {
21222 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21225 parser_lex(parser);
21226
21227 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21228 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21229
21230 pm_node_destroy(parser, node);
21231 return result;
21232 }
21234 parser_lex(parser);
21235
21236 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21237 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21238
21239 pm_node_destroy(parser, node);
21240 return result;
21241 }
21242 case PM_CONSTANT_PATH_NODE: {
21243 parser_lex(parser);
21244
21245 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21246 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21247
21248 return parse_shareable_constant_write(parser, write);
21249 }
21250 case PM_CONSTANT_READ_NODE: {
21251 parser_lex(parser);
21252
21253 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21254 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21255
21256 pm_node_destroy(parser, node);
21257 return parse_shareable_constant_write(parser, write);
21258 }
21260 parser_lex(parser);
21261
21262 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21263 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21264
21265 pm_node_destroy(parser, node);
21266 return result;
21267 }
21269 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21270 parser_lex(parser);
21271
21272 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21273 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21274
21275 parse_target_implicit_parameter(parser, node);
21276 pm_node_destroy(parser, node);
21277 return result;
21278 }
21280 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21281 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21282 parse_target_implicit_parameter(parser, node);
21283 }
21284
21286 parser_lex(parser);
21287
21288 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21289 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21290
21291 pm_node_destroy(parser, node);
21292 return result;
21293 }
21294 case PM_CALL_NODE: {
21295 pm_call_node_t *cast = (pm_call_node_t *) node;
21296
21297 // If we have a vcall (a method with no arguments and no
21298 // receiver that could have been a local variable) then we
21299 // will transform it into a local variable write.
21301 pm_location_t *message_loc = &cast->message_loc;
21302 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21303
21304 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21305 parser_lex(parser);
21306
21307 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21308 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21309
21310 pm_node_destroy(parser, (pm_node_t *) cast);
21311 return result;
21312 }
21313
21314 // Move past the token here so that we have already added
21315 // the local variable by this point.
21316 parser_lex(parser);
21317
21318 // If there is no call operator and the message is "[]" then
21319 // this is an aref expression, and we can transform it into
21320 // an aset expression.
21321 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21322 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21323 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21324 }
21325
21326 // If this node cannot be writable, then we have an error.
21327 if (pm_call_node_writable_p(parser, cast)) {
21328 parse_write_name(parser, &cast->name);
21329 } else {
21330 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21331 }
21332
21333 parse_call_operator_write(parser, cast, &token);
21334 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21335 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21336 }
21337 case PM_MULTI_WRITE_NODE: {
21338 parser_lex(parser);
21339 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21340 return node;
21341 }
21342 default:
21343 parser_lex(parser);
21344
21345 // In this case we have an &&= sign, but we don't know what it's for.
21346 // We need to treat it as an error. For now, we'll mark it as an error
21347 // and just skip right past it.
21348 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21349 return node;
21350 }
21351 }
21353 switch (PM_NODE_TYPE(node)) {
21356 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21359 parser_lex(parser);
21360
21361 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21362 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21363
21364 pm_node_destroy(parser, node);
21365 return result;
21366 }
21368 parser_lex(parser);
21369
21370 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21371 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21372
21373 pm_node_destroy(parser, node);
21374 return result;
21375 }
21376 case PM_CONSTANT_PATH_NODE: {
21377 parser_lex(parser);
21378
21379 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21380 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21381
21382 return parse_shareable_constant_write(parser, write);
21383 }
21384 case PM_CONSTANT_READ_NODE: {
21385 parser_lex(parser);
21386
21387 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21388 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21389
21390 pm_node_destroy(parser, node);
21391 return parse_shareable_constant_write(parser, write);
21392 }
21394 parser_lex(parser);
21395
21396 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21397 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21398
21399 pm_node_destroy(parser, node);
21400 return result;
21401 }
21403 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21404 parser_lex(parser);
21405
21406 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21407 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21408
21409 parse_target_implicit_parameter(parser, node);
21410 pm_node_destroy(parser, node);
21411 return result;
21412 }
21414 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21415 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21416 parse_target_implicit_parameter(parser, node);
21417 }
21418
21420 parser_lex(parser);
21421
21422 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21423 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21424
21425 pm_node_destroy(parser, node);
21426 return result;
21427 }
21428 case PM_CALL_NODE: {
21429 pm_call_node_t *cast = (pm_call_node_t *) node;
21430
21431 // If we have a vcall (a method with no arguments and no
21432 // receiver that could have been a local variable) then we
21433 // will transform it into a local variable write.
21435 pm_location_t *message_loc = &cast->message_loc;
21436 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21437
21438 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21439 parser_lex(parser);
21440
21441 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21442 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21443
21444 pm_node_destroy(parser, (pm_node_t *) cast);
21445 return result;
21446 }
21447
21448 // Move past the token here so that we have already added
21449 // the local variable by this point.
21450 parser_lex(parser);
21451
21452 // If there is no call operator and the message is "[]" then
21453 // this is an aref expression, and we can transform it into
21454 // an aset expression.
21455 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21456 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21457 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21458 }
21459
21460 // If this node cannot be writable, then we have an error.
21461 if (pm_call_node_writable_p(parser, cast)) {
21462 parse_write_name(parser, &cast->name);
21463 } else {
21464 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21465 }
21466
21467 parse_call_operator_write(parser, cast, &token);
21468 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21469 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21470 }
21471 case PM_MULTI_WRITE_NODE: {
21472 parser_lex(parser);
21473 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21474 return node;
21475 }
21476 default:
21477 parser_lex(parser);
21478
21479 // In this case we have an ||= sign, but we don't know what it's for.
21480 // We need to treat it as an error. For now, we'll mark it as an error
21481 // and just skip right past it.
21482 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21483 return node;
21484 }
21485 }
21497 switch (PM_NODE_TYPE(node)) {
21500 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21503 parser_lex(parser);
21504
21505 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21506 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21507
21508 pm_node_destroy(parser, node);
21509 return result;
21510 }
21512 parser_lex(parser);
21513
21514 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21515 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21516
21517 pm_node_destroy(parser, node);
21518 return result;
21519 }
21520 case PM_CONSTANT_PATH_NODE: {
21521 parser_lex(parser);
21522
21523 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21524 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21525
21526 return parse_shareable_constant_write(parser, write);
21527 }
21528 case PM_CONSTANT_READ_NODE: {
21529 parser_lex(parser);
21530
21531 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21532 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21533
21534 pm_node_destroy(parser, node);
21535 return parse_shareable_constant_write(parser, write);
21536 }
21538 parser_lex(parser);
21539
21540 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21541 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21542
21543 pm_node_destroy(parser, node);
21544 return result;
21545 }
21547 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21548 parser_lex(parser);
21549
21550 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21551 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21552
21553 parse_target_implicit_parameter(parser, node);
21554 pm_node_destroy(parser, node);
21555 return result;
21556 }
21558 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21559 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21560 parse_target_implicit_parameter(parser, node);
21561 }
21562
21564 parser_lex(parser);
21565
21566 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21567 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21568
21569 pm_node_destroy(parser, node);
21570 return result;
21571 }
21572 case PM_CALL_NODE: {
21573 parser_lex(parser);
21574 pm_call_node_t *cast = (pm_call_node_t *) node;
21575
21576 // If we have a vcall (a method with no arguments and no
21577 // receiver that could have been a local variable) then we
21578 // will transform it into a local variable write.
21580 pm_location_t *message_loc = &cast->message_loc;
21581 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21582
21583 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21584 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21585 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21586
21587 pm_node_destroy(parser, (pm_node_t *) cast);
21588 return result;
21589 }
21590
21591 // If there is no call operator and the message is "[]" then
21592 // this is an aref expression, and we can transform it into
21593 // an aset expression.
21594 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21595 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21596 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21597 }
21598
21599 // If this node cannot be writable, then we have an error.
21600 if (pm_call_node_writable_p(parser, cast)) {
21601 parse_write_name(parser, &cast->name);
21602 } else {
21603 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21604 }
21605
21606 parse_call_operator_write(parser, cast, &token);
21607 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21608 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21609 }
21610 case PM_MULTI_WRITE_NODE: {
21611 parser_lex(parser);
21612 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21613 return node;
21614 }
21615 default:
21616 parser_lex(parser);
21617
21618 // In this case we have an operator but we don't know what it's for.
21619 // We need to treat it as an error. For now, we'll mark it as an error
21620 // and just skip right past it.
21621 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21622 return node;
21623 }
21624 }
21626 case PM_TOKEN_KEYWORD_AND: {
21627 parser_lex(parser);
21628
21629 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21630 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21631 }
21633 case PM_TOKEN_PIPE_PIPE: {
21634 parser_lex(parser);
21635
21636 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21637 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21638 }
21639 case PM_TOKEN_EQUAL_TILDE: {
21640 // Note that we _must_ parse the value before adding the local
21641 // variables in order to properly mirror the behavior of Ruby. For
21642 // example,
21643 //
21644 // /(?<foo>bar)/ =~ foo
21645 //
21646 // In this case, `foo` should be a method call and not a local yet.
21647 parser_lex(parser);
21648 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21649
21650 // By default, we're going to create a call node and then return it.
21651 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21652 pm_node_t *result = (pm_node_t *) call;
21653
21654 // If the receiver of this =~ is a regular expression node, then we
21655 // need to introduce local variables for it based on its named
21656 // capture groups.
21658 // It's possible to have an interpolated regular expression node
21659 // that only contains strings. This is because it can be split
21660 // up by a heredoc. In this case we need to concat the unescaped
21661 // strings together and then parse them as a regular expression.
21663
21664 bool interpolated = false;
21665 size_t total_length = 0;
21666
21667 pm_node_t *part;
21668 PM_NODE_LIST_FOREACH(parts, index, part) {
21669 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21670 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21671 } else {
21672 interpolated = true;
21673 break;
21674 }
21675 }
21676
21677 if (!interpolated && total_length > 0) {
21678 void *memory = xmalloc(total_length);
21679 if (!memory) abort();
21680
21681 uint8_t *cursor = memory;
21682 PM_NODE_LIST_FOREACH(parts, index, part) {
21683 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21684 size_t length = pm_string_length(unescaped);
21685
21686 memcpy(cursor, pm_string_source(unescaped), length);
21687 cursor += length;
21688 }
21689
21690 pm_string_t owned;
21691 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21692
21693 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21694 pm_string_free(&owned);
21695 }
21696 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21697 // If we have a regular expression node, then we can just parse
21698 // the named captures directly off the unescaped string.
21699 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21700 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21701 }
21702
21703 return result;
21704 }
21706 case PM_TOKEN_USTAR:
21708 // The only times this will occur are when we are in an error state,
21709 // but we'll put them in here so that errors can propagate.
21715 case PM_TOKEN_CARET:
21716 case PM_TOKEN_PIPE:
21717 case PM_TOKEN_AMPERSAND:
21719 case PM_TOKEN_LESS_LESS:
21720 case PM_TOKEN_MINUS:
21721 case PM_TOKEN_PLUS:
21722 case PM_TOKEN_PERCENT:
21723 case PM_TOKEN_SLASH:
21724 case PM_TOKEN_STAR:
21725 case PM_TOKEN_STAR_STAR: {
21726 parser_lex(parser);
21727 pm_token_t operator = parser->previous;
21728 switch (PM_NODE_TYPE(node)) {
21732 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21733 }
21734 break;
21735 }
21736 case PM_AND_NODE: {
21737 pm_and_node_t *cast = (pm_and_node_t *) node;
21739 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21740 }
21741 break;
21742 }
21743 case PM_OR_NODE: {
21744 pm_or_node_t *cast = (pm_or_node_t *) node;
21746 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21747 }
21748 break;
21749 }
21750 default:
21751 break;
21752 }
21753
21754 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21755 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21756 }
21757 case PM_TOKEN_GREATER:
21759 case PM_TOKEN_LESS:
21760 case PM_TOKEN_LESS_EQUAL: {
21761 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21762 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21763 }
21764
21765 parser_lex(parser);
21766 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21767 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21768 }
21770 case PM_TOKEN_DOT: {
21771 parser_lex(parser);
21772 pm_token_t operator = parser->previous;
21773 pm_arguments_t arguments = { 0 };
21774
21775 // This if statement handles the foo.() syntax.
21776 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21777 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21778 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21779 }
21780
21781 switch (PM_NODE_TYPE(node)) {
21785 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21786 }
21787 break;
21788 }
21789 case PM_AND_NODE: {
21790 pm_and_node_t *cast = (pm_and_node_t *) node;
21792 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21793 }
21794 break;
21795 }
21796 case PM_OR_NODE: {
21797 pm_or_node_t *cast = (pm_or_node_t *) node;
21799 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21800 }
21801 break;
21802 }
21803 default:
21804 break;
21805 }
21806
21807 pm_token_t message;
21808
21809 switch (parser->current.type) {
21810 case PM_CASE_OPERATOR:
21811 case PM_CASE_KEYWORD:
21812 case PM_TOKEN_CONSTANT:
21814 case PM_TOKEN_METHOD_NAME: {
21815 parser_lex(parser);
21816 message = parser->previous;
21817 break;
21818 }
21819 default: {
21820 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21821 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21822 }
21823 }
21824
21825 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21826 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21827
21828 if (
21829 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21830 arguments.arguments == NULL &&
21831 arguments.opening_loc.start == NULL &&
21832 match1(parser, PM_TOKEN_COMMA)
21833 ) {
21834 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21835 } else {
21836 return (pm_node_t *) call;
21837 }
21838 }
21839 case PM_TOKEN_DOT_DOT:
21840 case PM_TOKEN_DOT_DOT_DOT: {
21841 parser_lex(parser);
21842
21843 pm_node_t *right = NULL;
21844 if (token_begins_expression_p(parser->current.type)) {
21845 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21846 }
21847
21848 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21849 }
21851 pm_token_t keyword = parser->current;
21852 parser_lex(parser);
21853
21854 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21855 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21856 }
21858 pm_token_t keyword = parser->current;
21859 parser_lex(parser);
21860
21861 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21862 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21863 }
21865 parser_lex(parser);
21866 pm_statements_node_t *statements = pm_statements_node_create(parser);
21867 pm_statements_node_body_append(parser, statements, node, true);
21868
21869 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21870 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21871 }
21873 parser_lex(parser);
21874 pm_statements_node_t *statements = pm_statements_node_create(parser);
21875 pm_statements_node_body_append(parser, statements, node, true);
21876
21877 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21878 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21879 }
21881 context_push(parser, PM_CONTEXT_TERNARY);
21882 pm_node_list_t current_block_exits = { 0 };
21883 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21884
21885 pm_token_t qmark = parser->current;
21886 parser_lex(parser);
21887
21888 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21889
21890 if (parser->recovering) {
21891 // If parsing the true expression of this ternary resulted in a syntax
21892 // error that we can recover from, then we're going to put missing nodes
21893 // and tokens into the remaining places. We want to be sure to do this
21894 // before the `expect` function call to make sure it doesn't
21895 // accidentally move past a ':' token that occurs after the syntax
21896 // error.
21897 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21898 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21899
21900 context_pop(parser);
21901 pop_block_exits(parser, previous_block_exits);
21902 pm_node_list_free(&current_block_exits);
21903
21904 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21905 }
21906
21907 accept1(parser, PM_TOKEN_NEWLINE);
21908 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21909
21910 pm_token_t colon = parser->previous;
21911 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21912
21913 context_pop(parser);
21914 pop_block_exits(parser, previous_block_exits);
21915 pm_node_list_free(&current_block_exits);
21916
21917 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21918 }
21919 case PM_TOKEN_COLON_COLON: {
21920 parser_lex(parser);
21921 pm_token_t delimiter = parser->previous;
21922
21923 switch (parser->current.type) {
21924 case PM_TOKEN_CONSTANT: {
21925 parser_lex(parser);
21926 pm_node_t *path;
21927
21928 if (
21929 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21930 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21931 ) {
21932 // If we have a constant immediately following a '::' operator, then
21933 // this can either be a constant path or a method call, depending on
21934 // what follows the constant.
21935 //
21936 // If we have parentheses, then this is a method call. That would
21937 // look like Foo::Bar().
21938 pm_token_t message = parser->previous;
21939 pm_arguments_t arguments = { 0 };
21940
21941 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21942 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21943 } else {
21944 // Otherwise, this is a constant path. That would look like Foo::Bar.
21945 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21946 }
21947
21948 // If this is followed by a comma then it is a multiple assignment.
21949 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21950 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21951 }
21952
21953 return path;
21954 }
21955 case PM_CASE_OPERATOR:
21956 case PM_CASE_KEYWORD:
21958 case PM_TOKEN_METHOD_NAME: {
21959 parser_lex(parser);
21960 pm_token_t message = parser->previous;
21961
21962 // If we have an identifier following a '::' operator, then it is for
21963 // sure a method call.
21964 pm_arguments_t arguments = { 0 };
21965 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21966 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21967
21968 // If this is followed by a comma then it is a multiple assignment.
21969 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21970 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21971 }
21972
21973 return (pm_node_t *) call;
21974 }
21976 // If we have a parenthesis following a '::' operator, then it is the
21977 // method call shorthand. That would look like Foo::(bar).
21978 pm_arguments_t arguments = { 0 };
21979 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21980
21981 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21982 }
21983 default: {
21984 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21985 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21986 }
21987 }
21988 }
21990 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21991 parser_lex(parser);
21992 accept1(parser, PM_TOKEN_NEWLINE);
21993
21994 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21995 context_pop(parser);
21996
21997 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21998 }
21999 case PM_TOKEN_BRACKET_LEFT: {
22000 parser_lex(parser);
22001
22002 pm_arguments_t arguments = { 0 };
22003 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22004
22005 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22006 pm_accepts_block_stack_push(parser, true);
22007 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22008 pm_accepts_block_stack_pop(parser);
22009 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22010 }
22011
22012 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22013
22014 // If we have a comma after the closing bracket then this is a multiple
22015 // assignment and we should parse the targets.
22016 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22017 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22018 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22019 }
22020
22021 // If we're at the end of the arguments, we can now check if there is a
22022 // block node that starts with a {. If there is, then we can parse it and
22023 // add it to the arguments.
22024 pm_block_node_t *block = NULL;
22025 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22026 block = parse_block(parser, (uint16_t) (depth + 1));
22027 pm_arguments_validate_block(parser, &arguments, block);
22028 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22029 block = parse_block(parser, (uint16_t) (depth + 1));
22030 }
22031
22032 if (block != NULL) {
22033 if (arguments.block != NULL) {
22034 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22035 if (arguments.arguments == NULL) {
22036 arguments.arguments = pm_arguments_node_create(parser);
22037 }
22038 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22039 }
22040
22041 arguments.block = (pm_node_t *) block;
22042 }
22043
22044 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22045 }
22046 case PM_TOKEN_KEYWORD_IN: {
22047 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22048 parser->pattern_matching_newlines = true;
22049
22050 pm_token_t operator = parser->current;
22051 parser->command_start = false;
22052 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22053 parser_lex(parser);
22054
22055 pm_constant_id_list_t captures = { 0 };
22056 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22057
22058 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22059 pm_constant_id_list_free(&captures);
22060
22061 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22062 }
22064 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22065 parser->pattern_matching_newlines = true;
22066
22067 pm_token_t operator = parser->current;
22068 parser->command_start = false;
22069 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22070 parser_lex(parser);
22071
22072 pm_constant_id_list_t captures = { 0 };
22073 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22074
22075 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22076 pm_constant_id_list_free(&captures);
22077
22078 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22079 }
22080 default:
22081 assert(false && "unreachable");
22082 return NULL;
22083 }
22084}
22085
22086#undef PM_PARSE_PATTERN_SINGLE
22087#undef PM_PARSE_PATTERN_TOP
22088#undef PM_PARSE_PATTERN_MULTI
22089
22094static inline bool
22095pm_call_node_command_p(const pm_call_node_t *node) {
22096 return (
22097 (node->opening_loc.start == NULL) &&
22098 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22099 (node->arguments != NULL || node->block != NULL)
22100 );
22101}
22102
22111static pm_node_t *
22112parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22113 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22114 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22115 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22116 }
22117
22118 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22119
22120 switch (PM_NODE_TYPE(node)) {
22121 case PM_MISSING_NODE:
22122 // If we found a syntax error, then the type of node returned by
22123 // parse_expression_prefix is going to be a missing node.
22124 return node;
22130 case PM_UNDEF_NODE:
22131 // These expressions are statements, and cannot be followed by
22132 // operators (except modifiers).
22133 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22134 return node;
22135 }
22136 break;
22137 case PM_CALL_NODE:
22138 // If we have a call node, then we need to check if it looks like a
22139 // method call without parentheses that contains arguments. If it
22140 // does, then it has different rules for parsing infix operators,
22141 // namely that it only accepts composition (and/or) and modifiers
22142 // (if/unless/etc.).
22143 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22144 return node;
22145 }
22146 break;
22147 case PM_SYMBOL_NODE:
22148 // If we have a symbol node that is being parsed as a label, then we
22149 // need to immediately return, because there should never be an
22150 // infix operator following this node.
22151 if (pm_symbol_node_label_p(node)) {
22152 return node;
22153 }
22154 break;
22155 default:
22156 break;
22157 }
22158
22159 // Otherwise we'll look and see if the next token can be parsed as an infix
22160 // operator. If it can, then we'll parse it using parse_expression_infix.
22161 pm_binding_powers_t current_binding_powers;
22162 pm_token_type_t current_token_type;
22163
22164 while (
22165 current_token_type = parser->current.type,
22166 current_binding_powers = pm_binding_powers[current_token_type],
22167 binding_power <= current_binding_powers.left &&
22168 current_binding_powers.binary
22169 ) {
22170 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22171
22172 switch (PM_NODE_TYPE(node)) {
22174 // Multi-write nodes are statements, and cannot be followed by
22175 // operators except modifiers.
22176 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22177 return node;
22178 }
22179 break;
22186 // These expressions are statements, by virtue of the right-hand
22187 // side of their write being an implicit array.
22188 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22189 return node;
22190 }
22191 break;
22192 case PM_CALL_NODE:
22193 // These expressions are also statements, by virtue of the
22194 // right-hand side of the expression (i.e., the last argument to
22195 // the call node) being an implicit array.
22196 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22197 return node;
22198 }
22199 break;
22200 default:
22201 break;
22202 }
22203
22204 // If the operator is nonassoc and we should not be able to parse the
22205 // upcoming infix operator, break.
22206 if (current_binding_powers.nonassoc) {
22207 // If this is a non-assoc operator and we are about to parse the
22208 // exact same operator, then we need to add an error.
22209 if (match1(parser, current_token_type)) {
22210 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22211 break;
22212 }
22213
22214 // If this is an endless range, then we need to reject a couple of
22215 // additional operators because it violates the normal operator
22216 // precedence rules. Those patterns are:
22217 //
22218 // 1.. & 2
22219 // 1.. * 2
22220 //
22221 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22223 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22224 break;
22225 }
22226
22227 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22228 break;
22229 }
22230 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22231 break;
22232 }
22233 }
22234
22235 if (accepts_command_call) {
22236 // A command-style method call is only accepted on method chains.
22237 // Thus, we check whether the parsed node can continue method chains.
22238 // The method chain can continue if the parsed node is one of the following five kinds:
22239 // (1) index access: foo[1]
22240 // (2) attribute access: foo.bar
22241 // (3) method call with parenthesis: foo.bar(1)
22242 // (4) method call with a block: foo.bar do end
22243 // (5) constant path: foo::Bar
22244 switch (node->type) {
22245 case PM_CALL_NODE: {
22246 pm_call_node_t *cast = (pm_call_node_t *)node;
22247 if (
22248 // (1) foo[1]
22249 !(
22250 cast->call_operator_loc.start == NULL &&
22251 cast->message_loc.start != NULL &&
22252 cast->message_loc.start[0] == '[' &&
22253 cast->message_loc.end[-1] == ']'
22254 ) &&
22255 // (2) foo.bar
22256 !(
22257 cast->call_operator_loc.start != NULL &&
22258 cast->arguments == NULL &&
22259 cast->block == NULL &&
22260 cast->opening_loc.start == NULL
22261 ) &&
22262 // (3) foo.bar(1)
22263 !(
22264 cast->call_operator_loc.start != NULL &&
22265 cast->opening_loc.start != NULL
22266 ) &&
22267 // (4) foo.bar do end
22268 !(
22269 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22270 )
22271 ) {
22272 accepts_command_call = false;
22273 }
22274 break;
22275 }
22276 // (5) foo::Bar
22278 break;
22279 default:
22280 accepts_command_call = false;
22281 break;
22282 }
22283 }
22284 }
22285
22286 return node;
22287}
22288
22293static pm_statements_node_t *
22294wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22295 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22296 if (statements == NULL) {
22297 statements = pm_statements_node_create(parser);
22298 }
22299
22300 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22301 pm_arguments_node_arguments_append(
22302 arguments,
22303 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22304 );
22305
22306 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22307 parser,
22308 arguments,
22309 pm_parser_constant_id_constant(parser, "print", 5)
22310 ), true);
22311 }
22312
22313 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22314 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22315 if (statements == NULL) {
22316 statements = pm_statements_node_create(parser);
22317 }
22318
22319 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22320 pm_arguments_node_arguments_append(
22321 arguments,
22322 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22323 );
22324
22325 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22326 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22327
22328 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22329 parser,
22330 pm_parser_constant_id_constant(parser, "$F", 2),
22331 (pm_node_t *) call
22332 );
22333
22334 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22335 }
22336
22337 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22338 pm_arguments_node_arguments_append(
22339 arguments,
22340 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22341 );
22342
22343 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22344 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22345 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22346 parser,
22347 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22348 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22349 (pm_node_t *) pm_true_node_synthesized_create(parser)
22350 ));
22351
22352 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22353 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22354 }
22355
22356 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22357 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22358 parser,
22359 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22360 statements
22361 ), true);
22362
22363 statements = wrapped_statements;
22364 }
22365
22366 return statements;
22367}
22368
22372static pm_node_t *
22373parse_program(pm_parser_t *parser) {
22374 // If the current scope is NULL, then we want to push a new top level scope.
22375 // The current scope could exist in the event that we are parsing an eval
22376 // and the user has passed into scopes that already exist.
22377 if (parser->current_scope == NULL) {
22378 pm_parser_scope_push(parser, true);
22379 }
22380
22381 pm_node_list_t current_block_exits = { 0 };
22382 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22383
22384 parser_lex(parser);
22385 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22386
22387 if (statements != NULL && !parser->parsing_eval) {
22388 // If we have statements, then the top-level statement should be
22389 // explicitly checked as well. We have to do this here because
22390 // everywhere else we check all but the last statement.
22391 assert(statements->body.size > 0);
22392 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22393 }
22394
22395 pm_constant_id_list_t locals;
22396 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22397 pm_parser_scope_pop(parser);
22398
22399 // At the top level, see if we need to wrap the statements in a program
22400 // node with a while loop based on the options.
22402 statements = wrap_statements(parser, statements);
22403 } else {
22404 flush_block_exits(parser, previous_block_exits);
22405 pm_node_list_free(&current_block_exits);
22406 }
22407
22408 // If this is an empty file, then we're still going to parse all of the
22409 // statements in order to gather up all of the comments and such. Here we'll
22410 // correct the location information.
22411 if (statements == NULL) {
22412 statements = pm_statements_node_create(parser);
22413 pm_statements_node_location_set(statements, parser->start, parser->start);
22414 }
22415
22416 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22417}
22418
22419/******************************************************************************/
22420/* External functions */
22421/******************************************************************************/
22422
22432static const char *
22433pm_strnstr(const char *big, const char *little, size_t big_length) {
22434 size_t little_length = strlen(little);
22435
22436 for (const char *big_end = big + big_length; big < big_end; big++) {
22437 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22438 }
22439
22440 return NULL;
22441}
22442
22443#ifdef _WIN32
22444#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22445#else
22451static void
22452pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22453 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22454 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22455 }
22456}
22457#endif
22458
22463static void
22464pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22465 const char *switches = pm_strnstr(engine, " -", length);
22466 if (switches == NULL) return;
22467
22468 pm_options_t next_options = *options;
22469 options->shebang_callback(
22470 &next_options,
22471 (const uint8_t *) (switches + 1),
22472 length - ((size_t) (switches - engine)) - 1,
22473 options->shebang_callback_data
22474 );
22475
22476 size_t encoding_length;
22477 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22478 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22479 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22480 }
22481
22482 parser->command_line = next_options.command_line;
22483 parser->frozen_string_literal = next_options.frozen_string_literal;
22484}
22485
22490pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22491 assert(source != NULL);
22492
22493 *parser = (pm_parser_t) {
22494 .node_id = 0,
22495 .lex_state = PM_LEX_STATE_BEG,
22496 .enclosure_nesting = 0,
22497 .lambda_enclosure_nesting = -1,
22498 .brace_nesting = 0,
22499 .do_loop_stack = 0,
22500 .accepts_block_stack = 0,
22501 .lex_modes = {
22502 .index = 0,
22503 .stack = {{ .mode = PM_LEX_DEFAULT }},
22504 .current = &parser->lex_modes.stack[0],
22505 },
22506 .start = source,
22507 .end = source + size,
22508 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22509 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22510 .next_start = NULL,
22511 .heredoc_end = NULL,
22512 .data_loc = { .start = NULL, .end = NULL },
22513 .comment_list = { 0 },
22514 .magic_comment_list = { 0 },
22515 .warning_list = { 0 },
22516 .error_list = { 0 },
22517 .current_scope = NULL,
22518 .current_context = NULL,
22519 .encoding = PM_ENCODING_UTF_8_ENTRY,
22520 .encoding_changed_callback = NULL,
22521 .encoding_comment_start = source,
22522 .lex_callback = NULL,
22523 .filepath = { 0 },
22524 .constant_pool = { 0 },
22525 .newline_list = { 0 },
22526 .integer_base = 0,
22527 .current_string = PM_STRING_EMPTY,
22528 .start_line = 1,
22529 .explicit_encoding = NULL,
22530 .command_line = 0,
22531 .parsing_eval = false,
22532 .partial_script = false,
22533 .command_start = true,
22534 .recovering = false,
22535 .encoding_locked = false,
22536 .encoding_changed = false,
22537 .pattern_matching_newlines = false,
22538 .in_keyword_arg = false,
22539 .current_block_exits = NULL,
22540 .semantic_token_seen = false,
22541 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22542 .current_regular_expression_ascii_only = false,
22543 .warn_mismatched_indentation = true
22544 };
22545
22546 // Initialize the constant pool. We're going to completely guess as to the
22547 // number of constants that we'll need based on the size of the input. The
22548 // ratio we chose here is actually less arbitrary than you might think.
22549 //
22550 // We took ~50K Ruby files and measured the size of the file versus the
22551 // number of constants that were found in those files. Then we found the
22552 // average and standard deviation of the ratios of constants/bytesize. Then
22553 // we added 1.34 standard deviations to the average to get a ratio that
22554 // would fit 75% of the files (for a two-tailed distribution). This works
22555 // because there was about a 0.77 correlation and the distribution was
22556 // roughly normal.
22557 //
22558 // This ratio will need to change if we add more constants to the constant
22559 // pool for another node type.
22560 uint32_t constant_size = ((uint32_t) size) / 95;
22561 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22562
22563 // Initialize the newline list. Similar to the constant pool, we're going to
22564 // guess at the number of newlines that we'll need based on the size of the
22565 // input.
22566 size_t newline_size = size / 22;
22567 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22568
22569 // If options were provided to this parse, establish them here.
22570 if (options != NULL) {
22571 // filepath option
22572 parser->filepath = options->filepath;
22573
22574 // line option
22575 parser->start_line = options->line;
22576
22577 // encoding option
22578 size_t encoding_length = pm_string_length(&options->encoding);
22579 if (encoding_length > 0) {
22580 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22581 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22582 }
22583
22584 // encoding_locked option
22585 parser->encoding_locked = options->encoding_locked;
22586
22587 // frozen_string_literal option
22589
22590 // command_line option
22591 parser->command_line = options->command_line;
22592
22593 // version option
22594 parser->version = options->version;
22595
22596 // partial_script
22597 parser->partial_script = options->partial_script;
22598
22599 // scopes option
22600 parser->parsing_eval = options->scopes_count > 0;
22601 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22602
22603 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22604 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22605 pm_parser_scope_push(parser, scope_index == 0);
22606
22607 // Scopes given from the outside are not allowed to have numbered
22608 // parameters.
22609 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22610
22611 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22612 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22613
22614 const uint8_t *source = pm_string_source(local);
22615 size_t length = pm_string_length(local);
22616
22617 void *allocated = xmalloc(length);
22618 if (allocated == NULL) continue;
22619
22620 memcpy(allocated, source, length);
22621 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22622 }
22623 }
22624 }
22625
22626 pm_accepts_block_stack_push(parser, true);
22627
22628 // Skip past the UTF-8 BOM if it exists.
22629 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22630 parser->current.end += 3;
22631 parser->encoding_comment_start += 3;
22632
22633 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22635 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22636 }
22637 }
22638
22639 // If the -x command line flag is set, or the first shebang of the file does
22640 // not include "ruby", then we'll search for a shebang that does include
22641 // "ruby" and start parsing from there.
22642 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22643
22644 // If the first two bytes of the source are a shebang, then we will do a bit
22645 // of extra processing.
22646 //
22647 // First, we'll indicate that the encoding comment is at the end of the
22648 // shebang. This means that when a shebang is present the encoding comment
22649 // can begin on the second line.
22650 //
22651 // Second, we will check if the shebang includes "ruby". If it does, then we
22652 // we will start parsing from there. We will also potentially warning the
22653 // user if there is a carriage return at the end of the shebang. We will
22654 // also potentially call the shebang callback if this is the main script to
22655 // allow the caller to parse the shebang and find any command-line options.
22656 // If the shebang does not include "ruby" and this is the main script being
22657 // parsed, then we will start searching the file for a shebang that does
22658 // contain "ruby" as if -x were passed on the command line.
22659 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22660 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22661
22662 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22663 const char *engine;
22664
22665 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22666 if (newline != NULL) {
22667 parser->encoding_comment_start = newline + 1;
22668
22669 if (options == NULL || options->main_script) {
22670 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22671 }
22672 }
22673
22674 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22675 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22676 }
22677
22678 search_shebang = false;
22679 } else if (options->main_script && !parser->parsing_eval) {
22680 search_shebang = true;
22681 }
22682 }
22683
22684 // Here we're going to find the first shebang that includes "ruby" and start
22685 // parsing from there.
22686 if (search_shebang) {
22687 // If a shebang that includes "ruby" is not found, then we're going to a
22688 // a load error to the list of errors on the parser.
22689 bool found_shebang = false;
22690
22691 // This is going to point to the start of each line as we check it.
22692 // We'll maintain a moving window looking at each line at they come.
22693 const uint8_t *cursor = parser->start;
22694
22695 // The newline pointer points to the end of the current line that we're
22696 // considering. If it is NULL, then we're at the end of the file.
22697 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22698
22699 while (newline != NULL) {
22700 pm_newline_list_append(&parser->newline_list, newline);
22701
22702 cursor = newline + 1;
22703 newline = next_newline(cursor, parser->end - cursor);
22704
22705 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22706 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22707 const char *engine;
22708 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22709 found_shebang = true;
22710
22711 if (newline != NULL) {
22712 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22713 parser->encoding_comment_start = newline + 1;
22714 }
22715
22716 if (options != NULL && options->shebang_callback != NULL) {
22717 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22718 }
22719
22720 break;
22721 }
22722 }
22723 }
22724
22725 if (found_shebang) {
22726 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22727 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22728 } else {
22729 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22730 pm_newline_list_clear(&parser->newline_list);
22731 }
22732 }
22733
22734 // The encoding comment can start after any amount of inline whitespace, so
22735 // here we'll advance it to the first non-inline-whitespace character so
22736 // that it is ready for future comparisons.
22737 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22738}
22739
22745pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22746 parser->encoding_changed_callback = callback;
22747}
22748
22752static inline void
22753pm_comment_list_free(pm_list_t *list) {
22754 pm_list_node_t *node, *next;
22755
22756 for (node = list->head; node != NULL; node = next) {
22757 next = node->next;
22758
22759 pm_comment_t *comment = (pm_comment_t *) node;
22760 xfree(comment);
22761 }
22762}
22763
22767static inline void
22768pm_magic_comment_list_free(pm_list_t *list) {
22769 pm_list_node_t *node, *next;
22770
22771 for (node = list->head; node != NULL; node = next) {
22772 next = node->next;
22773
22776 }
22777}
22778
22783pm_parser_free(pm_parser_t *parser) {
22784 pm_string_free(&parser->filepath);
22785 pm_diagnostic_list_free(&parser->error_list);
22786 pm_diagnostic_list_free(&parser->warning_list);
22787 pm_comment_list_free(&parser->comment_list);
22788 pm_magic_comment_list_free(&parser->magic_comment_list);
22789 pm_constant_pool_free(&parser->constant_pool);
22790 pm_newline_list_free(&parser->newline_list);
22791
22792 while (parser->current_scope != NULL) {
22793 // Normally, popping the scope doesn't free the locals since it is
22794 // assumed that ownership has transferred to the AST. However if we have
22795 // scopes while we're freeing the parser, it's likely they came from
22796 // eval scopes and we need to free them explicitly here.
22797 pm_parser_scope_pop(parser);
22798 }
22799
22800 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22801 lex_mode_pop(parser);
22802 }
22803}
22804
22809pm_parse(pm_parser_t *parser) {
22810 return parse_program(parser);
22811}
22812
22818static bool
22819pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
22820#define LINE_SIZE 4096
22821 char line[LINE_SIZE];
22822
22823 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22824 size_t length = LINE_SIZE;
22825 while (length > 0 && line[length - 1] == '\n') length--;
22826
22827 if (length == LINE_SIZE) {
22828 // If we read a line that is the maximum size and it doesn't end
22829 // with a newline, then we'll just append it to the buffer and
22830 // continue reading.
22831 length--;
22832 pm_buffer_append_string(buffer, line, length);
22833 continue;
22834 }
22835
22836 // Append the line to the buffer.
22837 length--;
22838 pm_buffer_append_string(buffer, line, length);
22839
22840 // Check if the line matches the __END__ marker. If it does, then stop
22841 // reading and return false. In most circumstances, this means we should
22842 // stop reading from the stream so that the DATA constant can pick it
22843 // up.
22844 switch (length) {
22845 case 7:
22846 if (strncmp(line, "__END__", 7) == 0) return false;
22847 break;
22848 case 8:
22849 if (strncmp(line, "__END__\n", 8) == 0) return false;
22850 break;
22851 case 9:
22852 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22853 break;
22854 }
22855 }
22856
22857 return true;
22858#undef LINE_SIZE
22859}
22860
22870static bool
22871pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22872 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22873
22874 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22875 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22876 return true;
22877 }
22878 }
22879
22880 return false;
22881}
22882
22890pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
22891 pm_buffer_init(buffer);
22892
22893 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22894 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22895 pm_node_t *node = pm_parse(parser);
22896
22897 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22898 pm_node_destroy(parser, node);
22899 eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22900
22901 pm_parser_free(parser);
22902 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22903 node = pm_parse(parser);
22904 }
22905
22906 return node;
22907}
22908
22913pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22914 pm_options_t options = { 0 };
22915 pm_options_read(&options, data);
22916
22917 pm_parser_t parser;
22918 pm_parser_init(&parser, source, size, &options);
22919
22920 pm_node_t *node = pm_parse(&parser);
22921 pm_node_destroy(&parser, node);
22922
22923 bool result = parser.error_list.size == 0;
22924 pm_parser_free(&parser);
22925 pm_options_free(&options);
22926
22927 return result;
22928}
22929
22930#undef PM_CASE_KEYWORD
22931#undef PM_CASE_OPERATOR
22932#undef PM_CASE_WRITABLE
22933#undef PM_STRING_EMPTY
22934#undef PM_LOCATION_NODE_BASE_VALUE
22935#undef PM_LOCATION_NODE_VALUE
22936#undef PM_LOCATION_NULL_VALUE
22937#undef PM_LOCATION_TOKEN_VALUE
22938
22939// We optionally support serializing to a binary string. For systems that don't
22940// want or need this functionality, it can be turned off with the
22941// PRISM_EXCLUDE_SERIALIZATION define.
22942#ifndef PRISM_EXCLUDE_SERIALIZATION
22943
22944static inline void
22945pm_serialize_header(pm_buffer_t *buffer) {
22946 pm_buffer_append_string(buffer, "PRISM", 5);
22947 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22948 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22949 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22950 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22951}
22952
22957pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22958 pm_serialize_header(buffer);
22959 pm_serialize_content(parser, node, buffer);
22960 pm_buffer_append_byte(buffer, '\0');
22961}
22962
22968pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22969 pm_options_t options = { 0 };
22970 pm_options_read(&options, data);
22971
22972 pm_parser_t parser;
22973 pm_parser_init(&parser, source, size, &options);
22974
22975 pm_node_t *node = pm_parse(&parser);
22976
22977 pm_serialize_header(buffer);
22978 pm_serialize_content(&parser, node, buffer);
22979 pm_buffer_append_byte(buffer, '\0');
22980
22981 pm_node_destroy(&parser, node);
22982 pm_parser_free(&parser);
22983 pm_options_free(&options);
22984}
22985
22991pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
22992 pm_parser_t parser;
22993 pm_options_t options = { 0 };
22994 pm_options_read(&options, data);
22995
22996 pm_buffer_t parser_buffer;
22997 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
22998 pm_serialize_header(buffer);
22999 pm_serialize_content(&parser, node, buffer);
23000 pm_buffer_append_byte(buffer, '\0');
23001
23002 pm_node_destroy(&parser, node);
23003 pm_buffer_free(&parser_buffer);
23004 pm_parser_free(&parser);
23005 pm_options_free(&options);
23006}
23007
23012pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23013 pm_options_t options = { 0 };
23014 pm_options_read(&options, data);
23015
23016 pm_parser_t parser;
23017 pm_parser_init(&parser, source, size, &options);
23018
23019 pm_node_t *node = pm_parse(&parser);
23020 pm_serialize_header(buffer);
23021 pm_serialize_encoding(parser.encoding, buffer);
23022 pm_buffer_append_varsint(buffer, parser.start_line);
23023 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23024
23025 pm_node_destroy(&parser, node);
23026 pm_parser_free(&parser);
23027 pm_options_free(&options);
23028}
23029
23030#endif
23031
23032/******************************************************************************/
23033/* Slice queries for the Ruby API */
23034/******************************************************************************/
23035
23037typedef enum {
23039 PM_SLICE_TYPE_ERROR = -1,
23040
23042 PM_SLICE_TYPE_NONE,
23043
23045 PM_SLICE_TYPE_LOCAL,
23046
23048 PM_SLICE_TYPE_CONSTANT,
23049
23051 PM_SLICE_TYPE_METHOD_NAME
23052} pm_slice_type_t;
23053
23057pm_slice_type_t
23058pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23059 // first, get the right encoding object
23060 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23061 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23062
23063 // check that there is at least one character
23064 if (length == 0) return PM_SLICE_TYPE_NONE;
23065
23066 size_t width;
23067 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23068 // valid because alphabetical
23069 } else if (*source == '_') {
23070 // valid because underscore
23071 width = 1;
23072 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23073 // valid because multibyte
23074 } else {
23075 // invalid because no match
23076 return PM_SLICE_TYPE_NONE;
23077 }
23078
23079 // determine the type of the slice based on the first character
23080 const uint8_t *end = source + length;
23081 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23082
23083 // next, iterate through all of the bytes of the string to ensure that they
23084 // are all valid identifier characters
23085 source += width;
23086
23087 while (source < end) {
23088 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23089 // valid because alphanumeric
23090 source += width;
23091 } else if (*source == '_') {
23092 // valid because underscore
23093 source++;
23094 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23095 // valid because multibyte
23096 source += width;
23097 } else {
23098 // invalid because no match
23099 break;
23100 }
23101 }
23102
23103 // accept a ! or ? at the end of the slice as a method name
23104 if (*source == '!' || *source == '?' || *source == '=') {
23105 source++;
23106 result = PM_SLICE_TYPE_METHOD_NAME;
23107 }
23108
23109 // valid if we are at the end of the slice
23110 return source == end ? result : PM_SLICE_TYPE_NONE;
23111}
23112
23117pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23118 switch (pm_slice_type(source, length, encoding_name)) {
23119 case PM_SLICE_TYPE_ERROR:
23120 return PM_STRING_QUERY_ERROR;
23121 case PM_SLICE_TYPE_NONE:
23122 case PM_SLICE_TYPE_CONSTANT:
23123 case PM_SLICE_TYPE_METHOD_NAME:
23124 return PM_STRING_QUERY_FALSE;
23125 case PM_SLICE_TYPE_LOCAL:
23126 return PM_STRING_QUERY_TRUE;
23127 }
23128
23129 assert(false && "unreachable");
23130 return PM_STRING_QUERY_FALSE;
23131}
23132
23137pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23138 switch (pm_slice_type(source, length, encoding_name)) {
23139 case PM_SLICE_TYPE_ERROR:
23140 return PM_STRING_QUERY_ERROR;
23141 case PM_SLICE_TYPE_NONE:
23142 case PM_SLICE_TYPE_LOCAL:
23143 case PM_SLICE_TYPE_METHOD_NAME:
23144 return PM_STRING_QUERY_FALSE;
23145 case PM_SLICE_TYPE_CONSTANT:
23146 return PM_STRING_QUERY_TRUE;
23147 }
23148
23149 assert(false && "unreachable");
23150 return PM_STRING_QUERY_FALSE;
23151}
23152
23157pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23158#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23159#define C1(c) (*source == c)
23160#define C2(s) (memcmp(source, s, 2) == 0)
23161#define C3(s) (memcmp(source, s, 3) == 0)
23162
23163 switch (pm_slice_type(source, length, encoding_name)) {
23164 case PM_SLICE_TYPE_ERROR:
23165 return PM_STRING_QUERY_ERROR;
23166 case PM_SLICE_TYPE_NONE:
23167 break;
23168 case PM_SLICE_TYPE_LOCAL:
23169 // numbered parameters are not valid method names
23170 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23171 case PM_SLICE_TYPE_CONSTANT:
23172 // all constants are valid method names
23173 case PM_SLICE_TYPE_METHOD_NAME:
23174 // all method names are valid method names
23175 return PM_STRING_QUERY_TRUE;
23176 }
23177
23178 switch (length) {
23179 case 1:
23180 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23181 case 2:
23182 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23183 case 3:
23184 return B(C3("===") || C3("<=>") || C3("[]="));
23185 default:
23186 return PM_STRING_QUERY_FALSE;
23187 }
23188
23189#undef B
23190#undef C1
23191#undef C2
23192#undef C3
23193}
@ PM_RANGE_FLAGS_EXCLUDE_END
... operator
Definition ast.h:7870
@ PM_DEFINED_NODE
DefinedNode.
Definition ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition ast.h:628
@ PM_NIL_NODE
NilNode.
Definition ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition ast.h:691
@ PM_OR_NODE
OrNode.
Definition ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition ast.h:889
@ PM_IF_NODE
IfNode.
Definition ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition ast.h:724
@ PM_HASH_NODE
HashNode.
Definition ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition ast.h:760
@ PM_AND_NODE
AndNode.
Definition ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition ast.h:1000
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition ast.h:955
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition ast.h:862
@ PM_STRING_NODE
StringNode.
Definition ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition ast.h:823
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7953
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition ast.h:7936
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition ast.h:7933
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7930
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition ast.h:7754
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition ast.h:7757
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition ast.h:7760
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition ast.h:1063
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition ast.h:1053
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition ast.h:7819
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition ast.h:7816
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition ast.h:7813
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition ast.h:7810
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
@ PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS
parentheses that contain multiple potentially void statements
Definition ast.h:7862
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition ast.h:7962
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition ast.h:7782
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition ast.h:7788
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition ast.h:7785
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7908
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition ast.h:7881
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1040
@ PM_TOKEN_STAR_STAR
**
Definition ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ... range operator or forwarding parameter
Definition ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition ast.h:328
@ PM_TOKEN_COMMA
,
Definition ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition ast.h:523
@ PM_TOKEN_GREATER
Definition ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition ast.h:505
@ PM_TOKEN_EMBVAR
Definition ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
‍>=
Definition ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition ast.h:409
@ PM_TOKEN_PERCENT
%
Definition ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition ast.h:274
@ PM_TOKEN_BANG
! or !@
Definition ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition ast.h:100
@ PM_TOKEN_PIPE
|
Definition ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition ast.h:397
@ PM_TOKEN_BANG_TILDE
!~
Definition ast.h:67
@ PM_TOKEN_DOT
the .
Definition ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition ast.h:490
@ PM_TOKEN_MINUS
Definition ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition ast.h:217
@ PM_TOKEN_LESS
<
Definition ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition ast.h:271
@ PM_TOKEN_SLASH
/
Definition ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition ast.h:301
@ PM_TOKEN_COLON
:
Definition ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition ast.h:337
@ PM_TOKEN_EQUAL
=
Definition ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ... operator
Definition ast.h:499
@ PM_TOKEN_STAR
Definition ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition ast.h:448
@ PM_TOKEN_CARET
^
Definition ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition ast.h:277
@ PM_TOKEN_PLUS
Definition ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition ast.h:205
@ PM_TOKEN_LABEL
a label
Definition ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition ast.h:367
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7799
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition ast.h:7846
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:213
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:219
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2139
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2116
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2046
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:362
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:18009
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:18011
const uint8_t * start
The start of the regular expression.
Definition prism.c:18014
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18025
const uint8_t * end
The end of the regular expression.
Definition prism.c:18017
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20900
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20911
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20902
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20908
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20905
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20918
AndNode.
Definition ast.h:1263
struct pm_node * left
AndNode::left.
Definition ast.h:1279
struct pm_node * right
AndNode::right.
Definition ast.h:1292
ArgumentsNode.
Definition ast.h:1324
pm_node_t base
The embedded base node.
Definition ast.h:1326
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1337
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1355
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1365
ArrayPatternNode.
Definition ast.h:1416
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1424
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1464
pm_node_t base
The embedded base node.
Definition ast.h:1418
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1474
AssocNode.
Definition ast.h:1489
struct pm_node * value
AssocNode::value.
Definition ast.h:1521
struct pm_node * key
AssocNode::key.
Definition ast.h:1508
BeginNode.
Definition ast.h:1615
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1668
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1648
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1638
pm_node_t base
The embedded base node.
Definition ast.h:1617
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1658
This struct represents a set of binding powers used for a given token.
Definition prism.c:12963
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12971
pm_binding_power_t left
The left binding power.
Definition prism.c:12965
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12977
pm_binding_power_t right
The right binding power.
Definition prism.c:12968
BlockLocalVariableNode.
Definition ast.h:1734
BlockNode.
Definition ast.h:1762
BlockParameterNode.
Definition ast.h:1838
BlockParametersNode.
Definition ast.h:1892
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2119
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2180
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2200
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2138
pm_constant_id_t name
CallNode::name.
Definition ast.h:2161
pm_node_t base
The embedded base node.
Definition ast.h:2121
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2151
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2171
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2190
struct pm_node * block
CallNode::block.
Definition ast.h:2210
CaseMatchNode.
Definition ast.h:2545
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2568
CaseNode.
Definition ast.h:2615
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2638
ClassVariableReadNode.
Definition ast.h:2880
ClassVariableTargetNode.
Definition ast.h:2909
ClassVariableWriteNode.
Definition ast.h:2932
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3146
ConstantPathTargetNode.
Definition ast.h:3284
ConstantReadNode.
Definition ast.h:3379
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3408
ConstantWriteNode.
Definition ast.h:3431
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:359
ElseNode.
Definition ast.h:3610
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3623
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3708
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3721
FindPatternNode.
Definition ast.h:3765
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3773
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3793
pm_node_t base
The embedded base node.
Definition ast.h:3767
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3798
FlipFlopNode.
Definition ast.h:3816
FloatNode.
Definition ast.h:3849
double value
FloatNode::value.
Definition ast.h:3859
pm_node_t base
The embedded base node.
Definition ast.h:3851
ForwardingParameterNode.
Definition ast.h:3985
GlobalVariableReadNode.
Definition ast.h:4145
GlobalVariableTargetNode.
Definition ast.h:4174
GlobalVariableWriteNode.
Definition ast.h:4197
HashNode.
Definition ast.h:4259
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4285
HashPatternNode.
Definition ast.h:4313
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4336
pm_node_t base
The embedded base node.
Definition ast.h:4315
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4341
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4321
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4362
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4422
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4441
ImaginaryNode.
Definition ast.h:4468
InstanceVariableReadNode.
Definition ast.h:4958
InstanceVariableTargetNode.
Definition ast.h:4987
InstanceVariableWriteNode.
Definition ast.h:5010
IntegerNode.
Definition ast.h:5078
pm_integer_t value
IntegerNode::value.
Definition ast.h:5088
pm_node_t base
The embedded base node.
Definition ast.h:5080
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5116
InterpolatedRegularExpressionNode.
Definition ast.h:5162
InterpolatedStringNode.
Definition ast.h:5199
pm_node_t base
The embedded base node.
Definition ast.h:5201
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5207
InterpolatedSymbolNode.
Definition ast.h:5232
pm_node_t base
The embedded base node.
Definition ast.h:5234
InterpolatedXStringNode.
Definition ast.h:5265
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5273
pm_node_t base
The embedded base node.
Definition ast.h:5267
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5278
KeywordHashNode.
Definition ast.h:5337
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
enum pm_lex_mode::@92 mode
The type of this lex mode.
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
union pm_lex_mode::@93 as
The data associated with this type of lex mode.
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5579
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5610
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5597
LocalVariableTargetNode.
Definition ast.h:5625
LocalVariableWriteNode.
Definition ast.h:5653
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5680
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5667
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5745
MatchWriteNode.
Definition ast.h:5849
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5862
MultiTargetNode.
Definition ast.h:5945
pm_node_t base
The embedded base node.
Definition ast.h:5947
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6003
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:5963
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6013
MultiWriteNode.
Definition ast.h:6028
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:558
size_t size
The number of nodes in the list.
Definition ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:566
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1069
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1092
OptionalParameterNode.
Definition ast.h:6301
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:98
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:147
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:109
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:163
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:170
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:124
int32_t line
The line within the file that the parse starts on.
Definition options.h:118
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:103
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:156
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:180
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:129
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:112
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:144
OrNode.
Definition ast.h:6339
struct pm_node * left
OrNode::left.
Definition ast.h:6355
struct pm_node * right
OrNode::right.
Definition ast.h:6368
ParametersNode.
Definition ast.h:6394
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6412
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6432
pm_node_t base
The embedded base node.
Definition ast.h:6396
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6427
ParenthesesNode.
Definition ast.h:6450
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6458
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
struct pm_parser::@98 lex_modes
A stack of lex modes.
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6656
struct pm_node * right
RangeNode::right.
Definition ast.h:6686
struct pm_node * left
RangeNode::left.
Definition ast.h:6672
RationalNode.
Definition ast.h:6714
pm_node_t base
The embedded base node.
Definition ast.h:6716
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6726
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10389
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10394
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10391
RegularExpressionNode.
Definition ast.h:6781
pm_node_t base
The embedded base node.
Definition ast.h:6783
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6804
RequiredParameterNode.
Definition ast.h:6855
RescueModifierNode.
Definition ast.h:6878
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:6896
RescueNode.
Definition ast.h:6916
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:6954
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:6944
pm_node_t base
The embedded base node.
Definition ast.h:6918
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7216
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7229
StatementsNode.
Definition ast.h:7244
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7252
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7279
pm_node_t base
The embedded base node.
Definition ast.h:7281
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7302
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7297
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7287
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@99 type
The type of the string.
SymbolNode.
Definition ast.h:7371
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7384
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7394
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10363
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10368
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10374
This struct represents a token in the Ruby source.
Definition ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:535
pm_token_type_t type
The type of the token.
Definition ast.h:532
UndefNode.
Definition ast.h:7427
UnlessNode.
Definition ast.h:7458
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7508
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7518
WhenNode.
Definition ast.h:7594
XStringNode.
Definition ast.h:7685