Ruby 4.0.0dev (2025-12-08 revision 4f900c35bc01e12b948703b9e4b4f5d0e803f073)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Helpful AST-related macros */
23/******************************************************************************/
24
25#define FL PM_NODE_FLAGS
26#define UP PM_NODE_UPCAST
27
28#define PM_TOKEN_START(token_) ((token_)->start)
29#define PM_TOKEN_END(token_) ((token_)->end)
30
31#define PM_NODE_START(node_) (UP(node_)->location.start)
32#define PM_NODE_END(node_) (UP(node_)->location.end)
33
34#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
35#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
36#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
37#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
38
39/******************************************************************************/
40/* Lex mode manipulations */
41/******************************************************************************/
42
47static inline uint8_t
48lex_mode_incrementor(const uint8_t start) {
49 switch (start) {
50 case '(':
51 case '[':
52 case '{':
53 case '<':
54 return start;
55 default:
56 return '\0';
57 }
58}
59
64static inline uint8_t
65lex_mode_terminator(const uint8_t start) {
66 switch (start) {
67 case '(':
68 return ')';
69 case '[':
70 return ']';
71 case '{':
72 return '}';
73 case '<':
74 return '>';
75 default:
76 return start;
77 }
78}
79
85static bool
86lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
87 lex_mode.prev = parser->lex_modes.current;
88 parser->lex_modes.index++;
89
90 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
92 if (parser->lex_modes.current == NULL) return false;
93
94 *parser->lex_modes.current = lex_mode;
95 } else {
96 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
97 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
98 }
99
100 return true;
101}
102
106static inline bool
107lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
108 uint8_t incrementor = lex_mode_incrementor(delimiter);
109 uint8_t terminator = lex_mode_terminator(delimiter);
110
111 pm_lex_mode_t lex_mode = {
112 .mode = PM_LEX_LIST,
113 .as.list = {
114 .nesting = 0,
115 .interpolation = interpolation,
116 .incrementor = incrementor,
117 .terminator = terminator
118 }
119 };
120
121 // These are the places where we need to split up the content of the list.
122 // We'll use strpbrk to find the first of these characters.
123 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
124 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
125 size_t index = 7;
126
127 // Now we'll add the terminator to the list of breakpoints. If the
128 // terminator is not already a NULL byte, add it to the list.
129 if (terminator != '\0') {
130 breakpoints[index++] = terminator;
131 }
132
133 // If interpolation is allowed, then we're going to check for the #
134 // character. Otherwise we'll only look for escapes and the terminator.
135 if (interpolation) {
136 breakpoints[index++] = '#';
137 }
138
139 // If there is an incrementor, then we'll check for that as well.
140 if (incrementor != '\0') {
141 breakpoints[index++] = incrementor;
142 }
143
144 parser->explicit_encoding = NULL;
145 return lex_mode_push(parser, lex_mode);
146}
147
153static inline bool
154lex_mode_push_list_eof(pm_parser_t *parser) {
155 return lex_mode_push_list(parser, false, '\0');
156}
157
161static inline bool
162lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
163 pm_lex_mode_t lex_mode = {
164 .mode = PM_LEX_REGEXP,
165 .as.regexp = {
166 .nesting = 0,
167 .incrementor = incrementor,
168 .terminator = terminator
169 }
170 };
171
172 // These are the places where we need to split up the content of the
173 // regular expression. We'll use strpbrk to find the first of these
174 // characters.
175 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
176 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
177 size_t index = 4;
178
179 // First we'll add the terminator.
180 if (terminator != '\0') {
181 breakpoints[index++] = terminator;
182 }
183
184 // Next, if there is an incrementor, then we'll check for that as well.
185 if (incrementor != '\0') {
186 breakpoints[index++] = incrementor;
187 }
188
189 parser->explicit_encoding = NULL;
190 return lex_mode_push(parser, lex_mode);
191}
192
196static inline bool
197lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
198 pm_lex_mode_t lex_mode = {
199 .mode = PM_LEX_STRING,
200 .as.string = {
201 .nesting = 0,
202 .interpolation = interpolation,
203 .label_allowed = label_allowed,
204 .incrementor = incrementor,
205 .terminator = terminator
206 }
207 };
208
209 // These are the places where we need to split up the content of the
210 // string. We'll use strpbrk to find the first of these characters.
211 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
212 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
213 size_t index = 3;
214
215 // Now add in the terminator. If the terminator is not already a NULL byte,
216 // then we'll add it.
217 if (terminator != '\0') {
218 breakpoints[index++] = terminator;
219 }
220
221 // If interpolation is allowed, then we're going to check for the #
222 // character. Otherwise we'll only look for escapes and the terminator.
223 if (interpolation) {
224 breakpoints[index++] = '#';
225 }
226
227 // If we have an incrementor, then we'll add that in as a breakpoint as
228 // well.
229 if (incrementor != '\0') {
230 breakpoints[index++] = incrementor;
231 }
232
233 parser->explicit_encoding = NULL;
234 return lex_mode_push(parser, lex_mode);
235}
236
242static inline bool
243lex_mode_push_string_eof(pm_parser_t *parser) {
244 return lex_mode_push_string(parser, false, false, '\0', '\0');
245}
246
252static void
253lex_mode_pop(pm_parser_t *parser) {
254 if (parser->lex_modes.index == 0) {
255 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
256 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
257 parser->lex_modes.index--;
258 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
259 } else {
260 parser->lex_modes.index--;
261 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
262 xfree(parser->lex_modes.current);
263 parser->lex_modes.current = prev;
264 }
265}
266
270static inline bool
271lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
272 return parser->lex_state & state;
273}
274
275typedef enum {
276 PM_IGNORED_NEWLINE_NONE = 0,
277 PM_IGNORED_NEWLINE_ALL,
278 PM_IGNORED_NEWLINE_PATTERN
279} pm_ignored_newline_type_t;
280
281static inline pm_ignored_newline_type_t
282lex_state_ignored_p(pm_parser_t *parser) {
283 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
284
285 if (ignored) {
286 return PM_IGNORED_NEWLINE_ALL;
287 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
288 return PM_IGNORED_NEWLINE_PATTERN;
289 } else {
290 return PM_IGNORED_NEWLINE_NONE;
291 }
292}
293
294static inline bool
295lex_state_beg_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
297}
298
299static inline bool
300lex_state_arg_p(pm_parser_t *parser) {
301 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
302}
303
304static inline bool
305lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
306 if (parser->current.end >= parser->end) {
307 return false;
308 }
309 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
310}
311
312static inline bool
313lex_state_end_p(pm_parser_t *parser) {
314 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
315}
316
320static inline bool
321lex_state_operator_p(pm_parser_t *parser) {
322 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
323}
324
329static inline void
330lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
331 parser->lex_state = state;
332}
333
334#ifndef PM_DEBUG_LOGGING
339#define PM_DEBUG_LOGGING 0
340#endif
341
342#if PM_DEBUG_LOGGING
343PRISM_ATTRIBUTE_UNUSED static void
344debug_state(pm_parser_t *parser) {
345 fprintf(stderr, "STATE: ");
346 bool first = true;
347
348 if (parser->lex_state == PM_LEX_STATE_NONE) {
349 fprintf(stderr, "NONE\n");
350 return;
351 }
352
353#define CHECK_STATE(state) \
354 if (parser->lex_state & state) { \
355 if (!first) fprintf(stderr, "|"); \
356 fprintf(stderr, "%s", #state); \
357 first = false; \
358 }
359
360 CHECK_STATE(PM_LEX_STATE_BEG)
361 CHECK_STATE(PM_LEX_STATE_END)
362 CHECK_STATE(PM_LEX_STATE_ENDARG)
363 CHECK_STATE(PM_LEX_STATE_ENDFN)
364 CHECK_STATE(PM_LEX_STATE_ARG)
365 CHECK_STATE(PM_LEX_STATE_CMDARG)
366 CHECK_STATE(PM_LEX_STATE_MID)
367 CHECK_STATE(PM_LEX_STATE_FNAME)
368 CHECK_STATE(PM_LEX_STATE_DOT)
369 CHECK_STATE(PM_LEX_STATE_CLASS)
370 CHECK_STATE(PM_LEX_STATE_LABEL)
371 CHECK_STATE(PM_LEX_STATE_LABELED)
372 CHECK_STATE(PM_LEX_STATE_FITEM)
373
374#undef CHECK_STATE
375
376 fprintf(stderr, "\n");
377}
378
379static void
380debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
381 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
382 debug_state(parser);
383 lex_state_set(parser, state);
384 fprintf(stderr, "Now: ");
385 debug_state(parser);
386 fprintf(stderr, "\n");
387}
388
389#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
390#endif
391
392/******************************************************************************/
393/* Command-line macro helpers */
394/******************************************************************************/
395
397#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
398
400#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
401
403#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
404
406#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
407
409#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
410
412#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
413
415#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
416
417/******************************************************************************/
418/* Diagnostic-related functions */
419/******************************************************************************/
420
424static inline void
425pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
426 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
427}
428
432#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
433 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
434
439static inline void
440pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
441 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
442}
443
448#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
449 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
450
455static inline void
456pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
457 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
458}
459
464#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
465 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
466
471#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
472 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
473
478static inline void
479pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
480 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
481}
482
487static inline void
488pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
489 pm_parser_err(parser, token->start, token->end, diag_id);
490}
491
496#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
497 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
498
503#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
504 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
505
509static inline void
510pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
511 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
512}
513
518static inline void
519pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
520 pm_parser_warn(parser, token->start, token->end, diag_id);
521}
522
527static inline void
528pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
529 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
530}
531
535#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
536 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
537
542#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
543 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
544
549#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
550 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
551
556#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
557 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
558
564static void
565pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
566 PM_PARSER_ERR_FORMAT(
567 parser,
568 ident_start,
569 ident_start + ident_length,
570 PM_ERR_HEREDOC_TERM,
571 (int) ident_length,
572 (const char *) ident_start
573 );
574}
575
576/******************************************************************************/
577/* Scope-related functions */
578/******************************************************************************/
579
583static bool
584pm_parser_scope_push(pm_parser_t *parser, bool closed) {
585 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
586 if (scope == NULL) return false;
587
588 *scope = (pm_scope_t) {
589 .previous = parser->current_scope,
590 .locals = { 0 },
591 .parameters = PM_SCOPE_PARAMETERS_NONE,
592 .implicit_parameters = { 0 },
593 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
594 .closed = closed
595 };
596
597 parser->current_scope = scope;
598 return true;
599}
600
605static bool
606pm_parser_scope_toplevel_p(pm_parser_t *parser) {
607 pm_scope_t *scope = parser->current_scope;
608
609 do {
610 if (scope->previous == NULL) return true;
611 if (scope->closed) return false;
612 } while ((scope = scope->previous) != NULL);
613
614 assert(false && "unreachable");
615 return true;
616}
617
621static pm_scope_t *
622pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
623 pm_scope_t *scope = parser->current_scope;
624
625 while (depth-- > 0) {
626 assert(scope != NULL);
627 scope = scope->previous;
628 }
629
630 return scope;
631}
632
633typedef enum {
634 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
635 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
636 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
637} pm_scope_forwarding_param_check_result_t;
638
639static pm_scope_forwarding_param_check_result_t
640pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
641 pm_scope_t *scope = parser->current_scope;
642 bool conflict = false;
643
644 while (scope != NULL) {
645 if (scope->parameters & mask) {
646 if (scope->closed) {
647 if (conflict) {
648 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
649 } else {
650 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
651 }
652 }
653
654 conflict = true;
655 }
656
657 if (scope->closed) break;
658 scope = scope->previous;
659 }
660
661 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
662}
663
664static void
665pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
666 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
668 // Pass.
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
672 break;
673 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
674 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
675 break;
676 }
677}
678
679static void
680pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
681 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
683 // Pass.
684 break;
685 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
686 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
687 break;
688 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
689 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
690 break;
691 }
692}
693
694static void
695pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
696 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
697 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
698 // Pass.
699 break;
700 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
701 // This shouldn't happen, because ... is not allowed in the
702 // declaration of blocks. If we get here, we assume we already have
703 // an error for this.
704 break;
705 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
706 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
707 break;
708 }
709}
710
711static void
712pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
713 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
714 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
715 // Pass.
716 break;
717 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
718 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
719 break;
720 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
721 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
722 break;
723 }
724}
725
730pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
731 return parser->current_scope->shareable_constant;
732}
733
738static void
739pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
740 pm_scope_t *scope = parser->current_scope;
741
742 do {
743 scope->shareable_constant = shareable_constant;
744 } while (!scope->closed && (scope = scope->previous) != NULL);
745}
746
747/******************************************************************************/
748/* Local variable-related functions */
749/******************************************************************************/
750
754#define PM_LOCALS_HASH_THRESHOLD 9
755
756static void
757pm_locals_free(pm_locals_t *locals) {
758 if (locals->capacity > 0) {
759 xfree(locals->locals);
760 }
761}
762
767static uint32_t
768pm_locals_hash(pm_constant_id_t name) {
769 name = ((name >> 16) ^ name) * 0x45d9f3b;
770 name = ((name >> 16) ^ name) * 0x45d9f3b;
771 name = (name >> 16) ^ name;
772 return name;
773}
774
779static void
780pm_locals_resize(pm_locals_t *locals) {
781 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
782 assert(next_capacity > locals->capacity);
783
784 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
785 if (next_locals == NULL) abort();
786
787 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
788 if (locals->size > 0) {
789 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
790 }
791 } else {
792 // If we just switched from a list to a hash, then we need to fill in
793 // the hash values of all of the locals.
794 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
795 uint32_t mask = next_capacity - 1;
796
797 for (uint32_t index = 0; index < locals->capacity; index++) {
798 pm_local_t *local = &locals->locals[index];
799
800 if (local->name != PM_CONSTANT_ID_UNSET) {
801 if (hash_needed) local->hash = pm_locals_hash(local->name);
802
803 uint32_t hash = local->hash;
804 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
805 next_locals[hash & mask] = *local;
806 }
807 }
808 }
809
810 pm_locals_free(locals);
811 locals->locals = next_locals;
812 locals->capacity = next_capacity;
813}
814
830static bool
831pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
832 if (locals->size >= (locals->capacity / 4 * 3)) {
833 pm_locals_resize(locals);
834 }
835
836 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
837 for (uint32_t index = 0; index < locals->capacity; index++) {
838 pm_local_t *local = &locals->locals[index];
839
840 if (local->name == PM_CONSTANT_ID_UNSET) {
841 *local = (pm_local_t) {
842 .name = name,
843 .location = { .start = start, .end = end },
844 .index = locals->size++,
845 .reads = reads,
846 .hash = 0
847 };
848 return true;
849 } else if (local->name == name) {
850 return false;
851 }
852 }
853 } else {
854 uint32_t mask = locals->capacity - 1;
855 uint32_t hash = pm_locals_hash(name);
856 uint32_t initial_hash = hash;
857
858 do {
859 pm_local_t *local = &locals->locals[hash & mask];
860
861 if (local->name == PM_CONSTANT_ID_UNSET) {
862 *local = (pm_local_t) {
863 .name = name,
864 .location = { .start = start, .end = end },
865 .index = locals->size++,
866 .reads = reads,
867 .hash = initial_hash
868 };
869 return true;
870 } else if (local->name == name) {
871 return false;
872 } else {
873 hash++;
874 }
875 } while ((hash & mask) != initial_hash);
876 }
877
878 assert(false && "unreachable");
879 return true;
880}
881
886static uint32_t
887pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
888 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
889 for (uint32_t index = 0; index < locals->size; index++) {
890 pm_local_t *local = &locals->locals[index];
891 if (local->name == name) return index;
892 }
893 } else {
894 uint32_t mask = locals->capacity - 1;
895 uint32_t hash = pm_locals_hash(name);
896 uint32_t initial_hash = hash & mask;
897
898 do {
899 pm_local_t *local = &locals->locals[hash & mask];
900
901 if (local->name == PM_CONSTANT_ID_UNSET) {
902 return UINT32_MAX;
903 } else if (local->name == name) {
904 return hash & mask;
905 } else {
906 hash++;
907 }
908 } while ((hash & mask) != initial_hash);
909 }
910
911 return UINT32_MAX;
912}
913
918static void
919pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
920 uint32_t index = pm_locals_find(locals, name);
921 assert(index != UINT32_MAX);
922
923 pm_local_t *local = &locals->locals[index];
924 assert(local->reads < UINT32_MAX);
925
926 local->reads++;
927}
928
933static void
934pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
935 uint32_t index = pm_locals_find(locals, name);
936 assert(index != UINT32_MAX);
937
938 pm_local_t *local = &locals->locals[index];
939 assert(local->reads > 0);
940
941 local->reads--;
942}
943
947static uint32_t
948pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
949 uint32_t index = pm_locals_find(locals, name);
950 assert(index != UINT32_MAX);
951
952 return locals->locals[index].reads;
953}
954
963static void
964pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
965 pm_constant_id_list_init_capacity(list, locals->size);
966
967 // If we're still below the threshold for switching to a hash, then we only
968 // need to loop over the locals until we hit the size because the locals are
969 // stored in a list.
970 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
971
972 // We will only warn for unused variables if we're not at the top level, or
973 // if we're parsing a file outside of eval or -e.
974 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
975
976 for (uint32_t index = 0; index < capacity; index++) {
977 pm_local_t *local = &locals->locals[index];
978
979 if (local->name != PM_CONSTANT_ID_UNSET) {
980 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
981
982 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
983 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
984
985 if (constant->length >= 1 && *constant->start != '_') {
986 PM_PARSER_WARN_FORMAT(
987 parser,
988 local->location.start,
989 local->location.end,
990 PM_WARN_UNUSED_LOCAL_VARIABLE,
991 (int) constant->length,
992 (const char *) constant->start
993 );
994 }
995 }
996 }
997 }
998}
999
1000/******************************************************************************/
1001/* Node-related functions */
1002/******************************************************************************/
1003
1007static inline pm_constant_id_t
1008pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1009 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1010}
1011
1015static inline pm_constant_id_t
1016pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1017 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1018}
1019
1023static inline pm_constant_id_t
1024pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1025 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1026}
1027
1031static inline pm_constant_id_t
1032pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1033 return pm_parser_constant_id_location(parser, token->start, token->end);
1034}
1035
1040static inline pm_constant_id_t
1041pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1042 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1043}
1044
1050static pm_node_t *
1051pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1052 pm_node_t *void_node = NULL;
1053
1054 while (node != NULL) {
1055 switch (PM_NODE_TYPE(node)) {
1056 case PM_RETURN_NODE:
1057 case PM_BREAK_NODE:
1058 case PM_NEXT_NODE:
1059 case PM_REDO_NODE:
1060 case PM_RETRY_NODE:
1061 case PM_MATCH_REQUIRED_NODE:
1062 return void_node != NULL ? void_node : node;
1063 case PM_MATCH_PREDICATE_NODE:
1064 return NULL;
1065 case PM_BEGIN_NODE: {
1066 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1067
1068 if (cast->ensure_clause != NULL) {
1069 if (cast->rescue_clause != NULL) {
1070 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1071 if (vn != NULL) return vn;
1072 }
1073
1074 if (cast->statements != NULL) {
1075 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1076 if (vn != NULL) return vn;
1077 }
1078
1079 node = UP(cast->ensure_clause);
1080 } else if (cast->rescue_clause != NULL) {
1081 if (cast->statements == NULL) return NULL;
1082
1083 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1084 if (vn == NULL) return NULL;
1085 if (void_node == NULL) void_node = vn;
1086
1087 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1088 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1089 if (vn == NULL) {
1090 void_node = NULL;
1091 break;
1092 }
1093 if (void_node == NULL) {
1094 void_node = vn;
1095 }
1096 }
1097
1098 if (cast->else_clause != NULL) {
1099 node = UP(cast->else_clause);
1100 } else {
1101 return void_node;
1102 }
1103 } else {
1104 node = UP(cast->statements);
1105 }
1106
1107 break;
1108 }
1109 case PM_ENSURE_NODE: {
1110 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1111 node = UP(cast->statements);
1112 break;
1113 }
1114 case PM_PARENTHESES_NODE: {
1116 node = UP(cast->body);
1117 break;
1118 }
1119 case PM_STATEMENTS_NODE: {
1121 node = cast->body.nodes[cast->body.size - 1];
1122 break;
1123 }
1124 case PM_IF_NODE: {
1125 pm_if_node_t *cast = (pm_if_node_t *) node;
1126 if (cast->statements == NULL || cast->subsequent == NULL) {
1127 return NULL;
1128 }
1129 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1130 if (vn == NULL) {
1131 return NULL;
1132 }
1133 if (void_node == NULL) {
1134 void_node = vn;
1135 }
1136 node = cast->subsequent;
1137 break;
1138 }
1139 case PM_UNLESS_NODE: {
1140 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1141 if (cast->statements == NULL || cast->else_clause == NULL) {
1142 return NULL;
1143 }
1144 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1145 if (vn == NULL) {
1146 return NULL;
1147 }
1148 if (void_node == NULL) {
1149 void_node = vn;
1150 }
1151 node = UP(cast->else_clause);
1152 break;
1153 }
1154 case PM_ELSE_NODE: {
1155 pm_else_node_t *cast = (pm_else_node_t *) node;
1156 node = UP(cast->statements);
1157 break;
1158 }
1159 case PM_AND_NODE: {
1160 pm_and_node_t *cast = (pm_and_node_t *) node;
1161 node = cast->left;
1162 break;
1163 }
1164 case PM_OR_NODE: {
1165 pm_or_node_t *cast = (pm_or_node_t *) node;
1166 node = cast->left;
1167 break;
1168 }
1169 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1171
1172 pm_scope_t *scope = parser->current_scope;
1173 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1174
1175 pm_locals_read(&scope->locals, cast->name);
1176 return NULL;
1177 }
1178 default:
1179 return NULL;
1180 }
1181 }
1182
1183 return NULL;
1184}
1185
1186static inline void
1187pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1188 pm_node_t *void_node = pm_check_value_expression(parser, node);
1189 if (void_node != NULL) {
1190 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1191 }
1192}
1193
1197static void
1198pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1199 const char *type = NULL;
1200 int length = 0;
1201
1202 switch (PM_NODE_TYPE(node)) {
1203 case PM_BACK_REFERENCE_READ_NODE:
1204 case PM_CLASS_VARIABLE_READ_NODE:
1205 case PM_GLOBAL_VARIABLE_READ_NODE:
1206 case PM_INSTANCE_VARIABLE_READ_NODE:
1207 case PM_LOCAL_VARIABLE_READ_NODE:
1208 case PM_NUMBERED_REFERENCE_READ_NODE:
1209 type = "a variable";
1210 length = 10;
1211 break;
1212 case PM_CALL_NODE: {
1213 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1214 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1215
1216 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1217 switch (message->length) {
1218 case 1:
1219 switch (message->start[0]) {
1220 case '+':
1221 case '-':
1222 case '*':
1223 case '/':
1224 case '%':
1225 case '|':
1226 case '^':
1227 case '&':
1228 case '>':
1229 case '<':
1230 type = (const char *) message->start;
1231 length = 1;
1232 break;
1233 }
1234 break;
1235 case 2:
1236 switch (message->start[1]) {
1237 case '=':
1238 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1239 type = (const char *) message->start;
1240 length = 2;
1241 }
1242 break;
1243 case '@':
1244 if (message->start[0] == '+' || message->start[0] == '-') {
1245 type = (const char *) message->start;
1246 length = 2;
1247 }
1248 break;
1249 case '*':
1250 if (message->start[0] == '*') {
1251 type = (const char *) message->start;
1252 length = 2;
1253 }
1254 break;
1255 }
1256 break;
1257 case 3:
1258 if (memcmp(message->start, "<=>", 3) == 0) {
1259 type = "<=>";
1260 length = 3;
1261 }
1262 break;
1263 }
1264
1265 break;
1266 }
1267 case PM_CONSTANT_PATH_NODE:
1268 type = "::";
1269 length = 2;
1270 break;
1271 case PM_CONSTANT_READ_NODE:
1272 type = "a constant";
1273 length = 10;
1274 break;
1275 case PM_DEFINED_NODE:
1276 type = "defined?";
1277 length = 8;
1278 break;
1279 case PM_FALSE_NODE:
1280 type = "false";
1281 length = 5;
1282 break;
1283 case PM_FLOAT_NODE:
1284 case PM_IMAGINARY_NODE:
1285 case PM_INTEGER_NODE:
1286 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1287 case PM_INTERPOLATED_STRING_NODE:
1288 case PM_RATIONAL_NODE:
1289 case PM_REGULAR_EXPRESSION_NODE:
1290 case PM_SOURCE_ENCODING_NODE:
1291 case PM_SOURCE_FILE_NODE:
1292 case PM_SOURCE_LINE_NODE:
1293 case PM_STRING_NODE:
1294 case PM_SYMBOL_NODE:
1295 type = "a literal";
1296 length = 9;
1297 break;
1298 case PM_NIL_NODE:
1299 type = "nil";
1300 length = 3;
1301 break;
1302 case PM_RANGE_NODE: {
1303 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1304
1305 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1306 type = "...";
1307 length = 3;
1308 } else {
1309 type = "..";
1310 length = 2;
1311 }
1312
1313 break;
1314 }
1315 case PM_SELF_NODE:
1316 type = "self";
1317 length = 4;
1318 break;
1319 case PM_TRUE_NODE:
1320 type = "true";
1321 length = 4;
1322 break;
1323 default:
1324 break;
1325 }
1326
1327 if (type != NULL) {
1328 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1329 }
1330}
1331
1336static void
1337pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1338 assert(node->body.size > 0);
1339 const size_t size = node->body.size - (last_value ? 1 : 0);
1340 for (size_t index = 0; index < size; index++) {
1341 pm_void_statement_check(parser, node->body.nodes[index]);
1342 }
1343}
1344
1350typedef enum {
1351 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1352 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1353 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1354} pm_conditional_predicate_type_t;
1355
1359static void
1360pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1361 switch (type) {
1362 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1363 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1364 break;
1365 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1366 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1367 break;
1368 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1369 break;
1370 }
1371}
1372
1377static bool
1378pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1379 switch (PM_NODE_TYPE(node)) {
1380 case PM_ARRAY_NODE: {
1381 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1382
1383 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1384 for (size_t index = 0; index < cast->elements.size; index++) {
1385 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1386 }
1387
1388 return true;
1389 }
1390 case PM_HASH_NODE: {
1391 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1392
1393 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1394 for (size_t index = 0; index < cast->elements.size; index++) {
1395 const pm_node_t *element = cast->elements.nodes[index];
1396 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1397
1398 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1399 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1400 }
1401
1402 return true;
1403 }
1404 case PM_FALSE_NODE:
1405 case PM_FLOAT_NODE:
1406 case PM_IMAGINARY_NODE:
1407 case PM_INTEGER_NODE:
1408 case PM_NIL_NODE:
1409 case PM_RATIONAL_NODE:
1410 case PM_REGULAR_EXPRESSION_NODE:
1411 case PM_SOURCE_ENCODING_NODE:
1412 case PM_SOURCE_FILE_NODE:
1413 case PM_SOURCE_LINE_NODE:
1414 case PM_STRING_NODE:
1415 case PM_SYMBOL_NODE:
1416 case PM_TRUE_NODE:
1417 return true;
1418 default:
1419 return false;
1420 }
1421}
1422
1427static inline void
1428pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1429 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1430 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1431 }
1432}
1433
1446static void
1447pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1448 switch (PM_NODE_TYPE(node)) {
1449 case PM_AND_NODE: {
1450 pm_and_node_t *cast = (pm_and_node_t *) node;
1451 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1452 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1453 break;
1454 }
1455 case PM_OR_NODE: {
1456 pm_or_node_t *cast = (pm_or_node_t *) node;
1457 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1458 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1459 break;
1460 }
1461 case PM_PARENTHESES_NODE: {
1463
1464 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1465 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1466 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1467 }
1468
1469 break;
1470 }
1471 case PM_BEGIN_NODE: {
1472 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1473 if (cast->statements != NULL) {
1474 pm_statements_node_t *statements = cast->statements;
1475 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1476 }
1477 break;
1478 }
1479 case PM_RANGE_NODE: {
1480 pm_range_node_t *cast = (pm_range_node_t *) node;
1481
1482 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1483 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1484
1485 // Here we change the range node into a flip flop node. We can do
1486 // this since the nodes are exactly the same except for the type.
1487 // We're only asserting against the size when we should probably
1488 // assert against the entire layout, but we'll assume tests will
1489 // catch this.
1490 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1491 node->type = PM_FLIP_FLOP_NODE;
1492
1493 break;
1494 }
1495 case PM_REGULAR_EXPRESSION_NODE:
1496 // Here we change the regular expression node into a match last line
1497 // node. We can do this since the nodes are exactly the same except
1498 // for the type.
1500 node->type = PM_MATCH_LAST_LINE_NODE;
1501
1502 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1503 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1504 }
1505
1506 break;
1507 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1508 // Here we change the interpolated regular expression node into an
1509 // interpolated match last line node. We can do this since the nodes
1510 // are exactly the same except for the type.
1512 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1513
1514 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1515 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1516 }
1517
1518 break;
1519 case PM_INTEGER_NODE:
1520 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1521 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1522 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1523 }
1524 } else {
1525 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1526 }
1527 break;
1528 case PM_STRING_NODE:
1529 case PM_SOURCE_FILE_NODE:
1530 case PM_INTERPOLATED_STRING_NODE:
1531 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1532 break;
1533 case PM_SYMBOL_NODE:
1534 case PM_INTERPOLATED_SYMBOL_NODE:
1535 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1536 break;
1537 case PM_SOURCE_LINE_NODE:
1538 case PM_SOURCE_ENCODING_NODE:
1539 case PM_FLOAT_NODE:
1540 case PM_RATIONAL_NODE:
1541 case PM_IMAGINARY_NODE:
1542 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1543 break;
1544 case PM_CLASS_VARIABLE_WRITE_NODE:
1545 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1546 break;
1547 case PM_CONSTANT_WRITE_NODE:
1548 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1549 break;
1550 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1551 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1552 break;
1553 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1554 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1555 break;
1556 case PM_LOCAL_VARIABLE_WRITE_NODE:
1557 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1558 break;
1559 case PM_MULTI_WRITE_NODE:
1560 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1561 break;
1562 default:
1563 break;
1564 }
1565}
1566
1575static inline pm_token_t
1576not_provided(pm_parser_t *parser) {
1577 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1578}
1579
1602
1606static inline const uint8_t *
1607pm_arguments_end(pm_arguments_t *arguments) {
1608 if (arguments->block != NULL) {
1609 const uint8_t *end = arguments->block->location.end;
1610 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1611 end = arguments->closing_loc.end;
1612 }
1613 return end;
1614 }
1615 if (arguments->closing_loc.start != NULL) {
1616 return arguments->closing_loc.end;
1617 }
1618 if (arguments->arguments != NULL) {
1619 return arguments->arguments->base.location.end;
1620 }
1621 return arguments->closing_loc.end;
1622}
1623
1628static void
1629pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1630 // First, check that we have arguments and that we don't have a closing
1631 // location for them.
1632 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1633 return;
1634 }
1635
1636 // Next, check that we don't have a single parentheses argument. This would
1637 // look like:
1638 //
1639 // foo (1) {}
1640 //
1641 // In this case, it's actually okay for the block to be attached to the
1642 // call, even though it looks like it's attached to the argument.
1643 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1644 return;
1645 }
1646
1647 // If we didn't hit a case before this check, then at this point we need to
1648 // add a syntax error.
1649 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1650}
1651
1652/******************************************************************************/
1653/* Basic character checks */
1654/******************************************************************************/
1655
1662static inline size_t
1663char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1664 if (n <= 0) return 0;
1665
1666 if (parser->encoding_changed) {
1667 size_t width;
1668
1669 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1670 return width;
1671 } else if (*b == '_') {
1672 return 1;
1673 } else if (*b >= 0x80) {
1674 return parser->encoding->char_width(b, n);
1675 } else {
1676 return 0;
1677 }
1678 } else if (*b < 0x80) {
1679 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1680 } else {
1681 return pm_encoding_utf_8_char_width(b, n);
1682 }
1683}
1684
1689static inline size_t
1690char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1691 if (n <= 0) {
1692 return 0;
1693 } else if (*b < 0x80) {
1694 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1695 } else {
1696 return pm_encoding_utf_8_char_width(b, n);
1697 }
1698}
1699
1705static inline size_t
1706char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1707 if (n <= 0) {
1708 return 0;
1709 } else if (parser->encoding_changed) {
1710 size_t width;
1711
1712 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1713 return width;
1714 } else if (*b == '_') {
1715 return 1;
1716 } else if (*b >= 0x80) {
1717 return parser->encoding->char_width(b, n);
1718 } else {
1719 return 0;
1720 }
1721 } else {
1722 return char_is_identifier_utf8(b, n);
1723 }
1724}
1725
1726// Here we're defining a perfect hash for the characters that are allowed in
1727// global names. This is used to quickly check the next character after a $ to
1728// see if it's a valid character for a global name.
1729#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1730#define PUNCT(idx) ( \
1731 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1732 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1733 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1734 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1735 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1736 BIT('0', idx))
1737
1738const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1739
1740#undef BIT
1741#undef PUNCT
1742
1743static inline bool
1744char_is_global_name_punctuation(const uint8_t b) {
1745 const unsigned int i = (const unsigned int) b;
1746 if (i <= 0x20 || 0x7e < i) return false;
1747
1748 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1749}
1750
1751static inline bool
1752token_is_setter_name(pm_token_t *token) {
1753 return (
1754 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1755 ((token->type == PM_TOKEN_IDENTIFIER) &&
1756 (token->end - token->start >= 2) &&
1757 (token->end[-1] == '='))
1758 );
1759}
1760
1764static bool
1765pm_local_is_keyword(const char *source, size_t length) {
1766#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1767
1768 switch (length) {
1769 case 2:
1770 switch (source[0]) {
1771 case 'd': KEYWORD("do"); return false;
1772 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1773 case 'o': KEYWORD("or"); return false;
1774 default: return false;
1775 }
1776 case 3:
1777 switch (source[0]) {
1778 case 'a': KEYWORD("and"); return false;
1779 case 'd': KEYWORD("def"); return false;
1780 case 'e': KEYWORD("end"); return false;
1781 case 'f': KEYWORD("for"); return false;
1782 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1783 default: return false;
1784 }
1785 case 4:
1786 switch (source[0]) {
1787 case 'c': KEYWORD("case"); return false;
1788 case 'e': KEYWORD("else"); return false;
1789 case 'n': KEYWORD("next"); return false;
1790 case 'r': KEYWORD("redo"); return false;
1791 case 's': KEYWORD("self"); return false;
1792 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1793 case 'w': KEYWORD("when"); return false;
1794 default: return false;
1795 }
1796 case 5:
1797 switch (source[0]) {
1798 case 'a': KEYWORD("alias"); return false;
1799 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1800 case 'c': KEYWORD("class"); return false;
1801 case 'e': KEYWORD("elsif"); return false;
1802 case 'f': KEYWORD("false"); return false;
1803 case 'r': KEYWORD("retry"); return false;
1804 case 's': KEYWORD("super"); return false;
1805 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1806 case 'w': KEYWORD("while"); return false;
1807 case 'y': KEYWORD("yield"); return false;
1808 default: return false;
1809 }
1810 case 6:
1811 switch (source[0]) {
1812 case 'e': KEYWORD("ensure"); return false;
1813 case 'm': KEYWORD("module"); return false;
1814 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1815 case 'u': KEYWORD("unless"); return false;
1816 default: return false;
1817 }
1818 case 8:
1819 KEYWORD("__LINE__");
1820 KEYWORD("__FILE__");
1821 return false;
1822 case 12:
1823 KEYWORD("__ENCODING__");
1824 return false;
1825 default:
1826 return false;
1827 }
1828
1829#undef KEYWORD
1830}
1831
1832/******************************************************************************/
1833/* Node flag handling functions */
1834/******************************************************************************/
1835
1839static inline void
1840pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1841 node->flags |= flag;
1842}
1843
1847static inline void
1848pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1849 node->flags &= (pm_node_flags_t) ~flag;
1850}
1851
1855static inline void
1856pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1857 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1858 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1859 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1860 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1861 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1862 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1863 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1864 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1865
1866 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1867}
1868
1869/******************************************************************************/
1870/* Node creation functions */
1871/******************************************************************************/
1872
1878#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1879
1883static inline pm_node_flags_t
1884pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1885 pm_node_flags_t flags = 0;
1886
1887 if (closing->type == PM_TOKEN_REGEXP_END) {
1888 pm_buffer_t unknown_flags = { 0 };
1889
1890 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1891 switch (*flag) {
1892 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1893 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1894 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1895 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1896
1897 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1898 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1899 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1900 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1901
1902 default: pm_buffer_append_byte(&unknown_flags, *flag);
1903 }
1904 }
1905
1906 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1907 if (unknown_flags_length != 0) {
1908 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1909 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1910 }
1911 pm_buffer_free(&unknown_flags);
1912 }
1913
1914 return flags;
1915}
1916
1917#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1918
1919static pm_statements_node_t *
1920pm_statements_node_create(pm_parser_t *parser);
1921
1922static void
1923pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1924
1925static size_t
1926pm_statements_node_body_length(pm_statements_node_t *node);
1927
1932static inline void *
1933pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1934 void *memory = xcalloc(1, size);
1935 if (memory == NULL) {
1936 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1937 abort();
1938 }
1939 return memory;
1940}
1941
1942#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
1943#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
1944 .type = (type_), \
1945 .flags = (flags_), \
1946 .node_id = ++(parser_)->node_id, \
1947 .location = { .start = (start_), .end = (end_) } \
1948}
1949
1950#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
1951#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
1952#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
1953#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
1954
1955#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
1956#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
1957#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
1958#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
1959
1963static pm_missing_node_t *
1964pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1965 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1966
1967 *node = (pm_missing_node_t) {
1968 .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
1969 };
1970
1971 return node;
1972}
1973
1977static pm_alias_global_variable_node_t *
1978pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1979 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1980 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1981
1982 *node = (pm_alias_global_variable_node_t) {
1983 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
1984 .new_name = new_name,
1985 .old_name = old_name,
1986 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1987 };
1988
1989 return node;
1990}
1991
1995static pm_alias_method_node_t *
1996pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1997 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1998 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1999
2000 *node = (pm_alias_method_node_t) {
2001 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
2002 .new_name = new_name,
2003 .old_name = old_name,
2004 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2005 };
2006
2007 return node;
2008}
2009
2013static pm_alternation_pattern_node_t *
2014pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2015 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2016
2017 *node = (pm_alternation_pattern_node_t) {
2018 .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
2019 .left = left,
2020 .right = right,
2021 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2022 };
2023
2024 return node;
2025}
2026
2030static pm_and_node_t *
2031pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2032 pm_assert_value_expression(parser, left);
2033
2034 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2035
2036 *node = (pm_and_node_t) {
2037 .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
2038 .left = left,
2039 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2040 .right = right
2041 };
2042
2043 return node;
2044}
2045
2049static pm_arguments_node_t *
2050pm_arguments_node_create(pm_parser_t *parser) {
2051 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2052
2053 *node = (pm_arguments_node_t) {
2054 .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
2055 .arguments = { 0 }
2056 };
2057
2058 return node;
2059}
2060
2064static size_t
2065pm_arguments_node_size(pm_arguments_node_t *node) {
2066 return node->arguments.size;
2067}
2068
2072static void
2073pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2074 if (pm_arguments_node_size(node) == 0) {
2075 node->base.location.start = argument->location.start;
2076 }
2077
2078 node->base.location.end = argument->location.end;
2079 pm_node_list_append(&node->arguments, argument);
2080
2081 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2082 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2083 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2084 } else {
2085 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2086 }
2087 }
2088}
2089
2093static pm_array_node_t *
2094pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2095 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2096
2097 *node = (pm_array_node_t) {
2098 .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
2099 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2100 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2101 .elements = { 0 }
2102 };
2103
2104 return node;
2105}
2106
2110static inline void
2111pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2112 if (!node->elements.size && !node->opening_loc.start) {
2113 node->base.location.start = element->location.start;
2114 }
2115
2116 pm_node_list_append(&node->elements, element);
2117 node->base.location.end = element->location.end;
2118
2119 // If the element is not a static literal, then the array is not a static
2120 // literal. Turn that flag off.
2121 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2122 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2123 }
2124
2125 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2126 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2127 }
2128}
2129
2133static void
2134pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2135 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2136 node->base.location.end = closing->end;
2137 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2138}
2139
2144static pm_array_pattern_node_t *
2145pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2146 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2147
2148 *node = (pm_array_pattern_node_t) {
2149 .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2150 .constant = NULL,
2151 .rest = NULL,
2152 .requireds = { 0 },
2153 .posts = { 0 },
2154 .opening_loc = { 0 },
2155 .closing_loc = { 0 }
2156 };
2157
2158 // For now we're going to just copy over each pointer manually. This could be
2159 // much more efficient, as we could instead resize the node list.
2160 bool found_rest = false;
2161 pm_node_t *child;
2162
2163 PM_NODE_LIST_FOREACH(nodes, index, child) {
2164 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2165 node->rest = child;
2166 found_rest = true;
2167 } else if (found_rest) {
2168 pm_node_list_append(&node->posts, child);
2169 } else {
2170 pm_node_list_append(&node->requireds, child);
2171 }
2172 }
2173
2174 return node;
2175}
2176
2180static pm_array_pattern_node_t *
2181pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2182 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2183
2184 *node = (pm_array_pattern_node_t) {
2185 .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
2186 .constant = NULL,
2187 .rest = rest,
2188 .requireds = { 0 },
2189 .posts = { 0 },
2190 .opening_loc = { 0 },
2191 .closing_loc = { 0 }
2192 };
2193
2194 return node;
2195}
2196
2201static pm_array_pattern_node_t *
2202pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2203 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2204
2205 *node = (pm_array_pattern_node_t) {
2206 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
2207 .constant = constant,
2208 .rest = NULL,
2209 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2210 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2211 .requireds = { 0 },
2212 .posts = { 0 }
2213 };
2214
2215 return node;
2216}
2217
2222static pm_array_pattern_node_t *
2223pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2224 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2225
2226 *node = (pm_array_pattern_node_t) {
2227 .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
2228 .constant = NULL,
2229 .rest = NULL,
2230 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2231 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2232 .requireds = { 0 },
2233 .posts = { 0 }
2234 };
2235
2236 return node;
2237}
2238
2239static inline void
2240pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2241 pm_node_list_append(&node->requireds, inner);
2242}
2243
2247static pm_assoc_node_t *
2248pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2249 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2250 const uint8_t *end;
2251
2252 if (value != NULL && value->location.end > key->location.end) {
2253 end = value->location.end;
2254 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2255 end = operator->end;
2256 } else {
2257 end = key->location.end;
2258 }
2259
2260 // Hash string keys will be frozen, so we can mark them as frozen here so
2261 // that the compiler picks them up and also when we check for static literal
2262 // on the keys it gets factored in.
2263 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2264 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2265 }
2266
2267 // If the key and value of this assoc node are both static literals, then
2268 // we can mark this node as a static literal.
2269 pm_node_flags_t flags = 0;
2270 if (
2271 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2272 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2273 ) {
2274 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2275 }
2276
2277 *node = (pm_assoc_node_t) {
2278 .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
2279 .key = key,
2280 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2281 .value = value
2282 };
2283
2284 return node;
2285}
2286
2290static pm_assoc_splat_node_t *
2291pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2292 assert(operator->type == PM_TOKEN_USTAR_STAR);
2293 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2294
2295 *node = (pm_assoc_splat_node_t) {
2296 .base = (
2297 (value == NULL)
2298 ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
2299 : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
2300 ),
2301 .value = value,
2302 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2303 };
2304
2305 return node;
2306}
2307
2311static pm_back_reference_read_node_t *
2312pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2313 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2314 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2315
2316 *node = (pm_back_reference_read_node_t) {
2317 .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
2318 .name = pm_parser_constant_id_token(parser, name)
2319 };
2320
2321 return node;
2322}
2323
2327static pm_begin_node_t *
2328pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2329 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2330
2331 *node = (pm_begin_node_t) {
2332 .base = (
2333 (statements == NULL)
2334 ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
2335 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
2336 ),
2337 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2338 .statements = statements,
2339 .end_keyword_loc = { 0 }
2340 };
2341
2342 return node;
2343}
2344
2348static void
2349pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2350 // If the begin keyword doesn't exist, we set the start on the begin_node
2351 if (!node->begin_keyword_loc.start) {
2352 node->base.location.start = rescue_clause->base.location.start;
2353 }
2354 node->base.location.end = rescue_clause->base.location.end;
2355 node->rescue_clause = rescue_clause;
2356}
2357
2361static void
2362pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2363 node->base.location.end = else_clause->base.location.end;
2364 node->else_clause = else_clause;
2365}
2366
2370static void
2371pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2372 node->base.location.end = ensure_clause->base.location.end;
2373 node->ensure_clause = ensure_clause;
2374}
2375
2379static void
2380pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2381 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2382
2383 node->base.location.end = end_keyword->end;
2384 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2385}
2386
2390static pm_block_argument_node_t *
2391pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2392 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2393
2394 *node = (pm_block_argument_node_t) {
2395 .base = (
2396 (expression == NULL)
2397 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
2398 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
2399 ),
2400 .expression = expression,
2401 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2402 };
2403
2404 return node;
2405}
2406
2410static pm_block_node_t *
2411pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2412 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2413
2414 *node = (pm_block_node_t) {
2415 .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
2416 .locals = *locals,
2417 .parameters = parameters,
2418 .body = body,
2419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2421 };
2422
2423 return node;
2424}
2425
2429static pm_block_parameter_node_t *
2430pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2431 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2432 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2433
2434 *node = (pm_block_parameter_node_t) {
2435 .base = (
2436 (name->type == PM_TOKEN_NOT_PROVIDED)
2437 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
2438 : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
2439 ),
2440 .name = pm_parser_optional_constant_id_token(parser, name),
2441 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2442 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2443 };
2444
2445 return node;
2446}
2447
2451static pm_block_parameters_node_t *
2452pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2453 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2454
2455 const uint8_t *start;
2456 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2457 start = opening->start;
2458 } else if (parameters != NULL) {
2459 start = parameters->base.location.start;
2460 } else {
2461 start = NULL;
2462 }
2463
2464 const uint8_t *end;
2465 if (parameters != NULL) {
2466 end = parameters->base.location.end;
2467 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2468 end = opening->end;
2469 } else {
2470 end = NULL;
2471 }
2472
2473 *node = (pm_block_parameters_node_t) {
2474 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
2475 .parameters = parameters,
2476 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2477 .closing_loc = { 0 },
2478 .locals = { 0 }
2479 };
2480
2481 return node;
2482}
2483
2487static void
2488pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2489 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2490
2491 node->base.location.end = closing->end;
2492 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2493}
2494
2498static pm_block_local_variable_node_t *
2499pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2500 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2501
2502 *node = (pm_block_local_variable_node_t) {
2503 .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
2504 .name = pm_parser_constant_id_token(parser, name)
2505 };
2506
2507 return node;
2508}
2509
2513static void
2514pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2515 pm_node_list_append(&node->locals, UP(local));
2516
2517 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2518 node->base.location.end = local->base.location.end;
2519}
2520
2524static pm_break_node_t *
2525pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2526 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2527 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2528
2529 *node = (pm_break_node_t) {
2530 .base = (
2531 (arguments == NULL)
2532 ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
2533 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
2534 ),
2535 .arguments = arguments,
2536 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2537 };
2538
2539 return node;
2540}
2541
2542// There are certain flags that we want to use internally but don't want to
2543// expose because they are not relevant beyond parsing. Therefore we'll define
2544// them here and not define them in config.yml/a header file.
2545static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2546
2547static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2548static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2549static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2550
2556static pm_call_node_t *
2557pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2558 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2559
2560 *node = (pm_call_node_t) {
2561 .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
2562 .receiver = NULL,
2563 .call_operator_loc = { 0 },
2564 .message_loc = { 0 },
2565 .opening_loc = { 0 },
2566 .arguments = NULL,
2567 .closing_loc = { 0 },
2568 .equal_loc = { 0 },
2569 .block = NULL,
2570 .name = 0
2571 };
2572
2573 return node;
2574}
2575
2580static inline pm_node_flags_t
2581pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2582 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2583}
2584
2589static pm_call_node_t *
2590pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2591 pm_assert_value_expression(parser, receiver);
2592
2593 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2594 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2595 flags |= PM_CALL_NODE_FLAGS_INDEX;
2596 }
2597
2598 pm_call_node_t *node = pm_call_node_create(parser, flags);
2599
2600 node->base.location.start = receiver->location.start;
2601 node->base.location.end = pm_arguments_end(arguments);
2602
2603 node->receiver = receiver;
2604 node->message_loc.start = arguments->opening_loc.start;
2605 node->message_loc.end = arguments->closing_loc.end;
2606
2607 node->opening_loc = arguments->opening_loc;
2608 node->arguments = arguments->arguments;
2609 node->closing_loc = arguments->closing_loc;
2610 node->block = arguments->block;
2611
2612 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2613 return node;
2614}
2615
2619static pm_call_node_t *
2620pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2621 pm_assert_value_expression(parser, receiver);
2622 pm_assert_value_expression(parser, argument);
2623
2624 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2625
2626 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2627 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2628
2629 node->receiver = receiver;
2630 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2631
2632 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2633 pm_arguments_node_arguments_append(arguments, argument);
2634 node->arguments = arguments;
2635
2636 node->name = pm_parser_constant_id_token(parser, operator);
2637 return node;
2638}
2639
2640static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2641
2645static pm_call_node_t *
2646pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2647 pm_assert_value_expression(parser, receiver);
2648
2649 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2650
2651 node->base.location.start = receiver->location.start;
2652 const uint8_t *end = pm_arguments_end(arguments);
2653 if (end == NULL) {
2654 end = message->end;
2655 }
2656 node->base.location.end = end;
2657
2658 node->receiver = receiver;
2659 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2660 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2661 node->opening_loc = arguments->opening_loc;
2662 node->arguments = arguments->arguments;
2663 node->closing_loc = arguments->closing_loc;
2664 node->block = arguments->block;
2665
2666 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2667 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2668 }
2669
2674 node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
2675 return node;
2676}
2677
2681static pm_call_node_t *
2682pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2683 pm_call_node_t *node = pm_call_node_create(parser, 0);
2684 node->base.location.start = parser->start;
2685 node->base.location.end = parser->end;
2686
2687 node->receiver = receiver;
2688 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2689 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2690 node->arguments = arguments;
2691
2692 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2693 return node;
2694}
2695
2700static pm_call_node_t *
2701pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2702 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2703
2704 node->base.location.start = message->start;
2705 node->base.location.end = pm_arguments_end(arguments);
2706
2707 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2708 node->opening_loc = arguments->opening_loc;
2709 node->arguments = arguments->arguments;
2710 node->closing_loc = arguments->closing_loc;
2711 node->block = arguments->block;
2712
2713 node->name = pm_parser_constant_id_token(parser, message);
2714 return node;
2715}
2716
2721static pm_call_node_t *
2722pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2723 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2724
2725 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2726 node->arguments = arguments;
2727
2728 node->name = name;
2729 return node;
2730}
2731
2735static pm_call_node_t *
2736pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2737 pm_assert_value_expression(parser, receiver);
2738 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2739
2740 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2741
2742 node->base.location.start = message->start;
2743 if (arguments->closing_loc.start != NULL) {
2744 node->base.location.end = arguments->closing_loc.end;
2745 } else {
2746 assert(receiver != NULL);
2747 node->base.location.end = receiver->location.end;
2748 }
2749
2750 node->receiver = receiver;
2751 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2752 node->opening_loc = arguments->opening_loc;
2753 node->arguments = arguments->arguments;
2754 node->closing_loc = arguments->closing_loc;
2755
2756 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2757 return node;
2758}
2759
2763static pm_call_node_t *
2764pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2765 pm_assert_value_expression(parser, receiver);
2766
2767 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2768
2769 node->base.location.start = receiver->location.start;
2770 node->base.location.end = pm_arguments_end(arguments);
2771
2772 node->receiver = receiver;
2773 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2774 node->opening_loc = arguments->opening_loc;
2775 node->arguments = arguments->arguments;
2776 node->closing_loc = arguments->closing_loc;
2777 node->block = arguments->block;
2778
2779 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2780 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2781 }
2782
2783 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2784 return node;
2785}
2786
2790static pm_call_node_t *
2791pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2792 pm_assert_value_expression(parser, receiver);
2793
2794 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2795
2796 node->base.location.start = operator->start;
2797 node->base.location.end = receiver->location.end;
2798
2799 node->receiver = receiver;
2800 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2801
2802 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2803 return node;
2804}
2805
2810static pm_call_node_t *
2811pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2812 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2813
2814 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2815 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2816
2817 node->name = pm_parser_constant_id_token(parser, message);
2818 return node;
2819}
2820
2825static inline bool
2826pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2827 return (
2828 (node->message_loc.start != NULL) &&
2829 (node->message_loc.end[-1] != '!') &&
2830 (node->message_loc.end[-1] != '?') &&
2831 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2832 (node->opening_loc.start == NULL) &&
2833 (node->arguments == NULL) &&
2834 (node->block == NULL)
2835 );
2836}
2837
2841static void
2842pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2843 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2844
2845 if (write_constant->length > 0) {
2846 size_t length = write_constant->length - 1;
2847
2848 void *memory = xmalloc(length);
2849 memcpy(memory, write_constant->start, length);
2850
2851 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2852 } else {
2853 // We can get here if the message was missing because of a syntax error.
2854 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2855 }
2856}
2857
2861static pm_call_and_write_node_t *
2862pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2863 assert(target->block == NULL);
2864 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2865 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2866
2867 *node = (pm_call_and_write_node_t) {
2868 .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
2869 .receiver = target->receiver,
2870 .call_operator_loc = target->call_operator_loc,
2871 .message_loc = target->message_loc,
2872 .read_name = 0,
2873 .write_name = target->name,
2874 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2875 .value = value
2876 };
2877
2878 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2879
2880 // Here we're going to free the target, since it is no longer necessary.
2881 // However, we don't want to call `pm_node_destroy` because we want to keep
2882 // around all of its children since we just reused them.
2883 xfree(target);
2884
2885 return node;
2886}
2887
2892static void
2893pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2894 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2895 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2896 pm_node_t *node;
2897 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2898 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2899 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2900 break;
2901 }
2902 }
2903 }
2904
2905 if (block != NULL) {
2906 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2907 }
2908 }
2909}
2910
2914static pm_index_and_write_node_t *
2915pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2916 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2917 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2918
2919 pm_index_arguments_check(parser, target->arguments, target->block);
2920
2921 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2922 *node = (pm_index_and_write_node_t) {
2923 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
2924 .receiver = target->receiver,
2925 .call_operator_loc = target->call_operator_loc,
2926 .opening_loc = target->opening_loc,
2927 .arguments = target->arguments,
2928 .closing_loc = target->closing_loc,
2929 .block = (pm_block_argument_node_t *) target->block,
2930 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2931 .value = value
2932 };
2933
2934 // Here we're going to free the target, since it is no longer necessary.
2935 // However, we don't want to call `pm_node_destroy` because we want to keep
2936 // around all of its children since we just reused them.
2937 xfree(target);
2938
2939 return node;
2940}
2941
2945static pm_call_operator_write_node_t *
2946pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2947 assert(target->block == NULL);
2948 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
2949
2950 *node = (pm_call_operator_write_node_t) {
2951 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
2952 .receiver = target->receiver,
2953 .call_operator_loc = target->call_operator_loc,
2954 .message_loc = target->message_loc,
2955 .read_name = 0,
2956 .write_name = target->name,
2957 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2958 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2959 .value = value
2960 };
2961
2962 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2963
2964 // Here we're going to free the target, since it is no longer necessary.
2965 // However, we don't want to call `pm_node_destroy` because we want to keep
2966 // around all of its children since we just reused them.
2967 xfree(target);
2968
2969 return node;
2970}
2971
2975static pm_index_operator_write_node_t *
2976pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2977 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
2978
2979 pm_index_arguments_check(parser, target->arguments, target->block);
2980
2981 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2982 *node = (pm_index_operator_write_node_t) {
2983 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
2984 .receiver = target->receiver,
2985 .call_operator_loc = target->call_operator_loc,
2986 .opening_loc = target->opening_loc,
2987 .arguments = target->arguments,
2988 .closing_loc = target->closing_loc,
2989 .block = (pm_block_argument_node_t *) target->block,
2990 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2991 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2992 .value = value
2993 };
2994
2995 // Here we're going to free the target, since it is no longer necessary.
2996 // However, we don't want to call `pm_node_destroy` because we want to keep
2997 // around all of its children since we just reused them.
2998 xfree(target);
2999
3000 return node;
3001}
3002
3006static pm_call_or_write_node_t *
3007pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3008 assert(target->block == NULL);
3009 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3010 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3011
3012 *node = (pm_call_or_write_node_t) {
3013 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
3014 .receiver = target->receiver,
3015 .call_operator_loc = target->call_operator_loc,
3016 .message_loc = target->message_loc,
3017 .read_name = 0,
3018 .write_name = target->name,
3019 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3020 .value = value
3021 };
3022
3023 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3024
3025 // Here we're going to free the target, since it is no longer necessary.
3026 // However, we don't want to call `pm_node_destroy` because we want to keep
3027 // around all of its children since we just reused them.
3028 xfree(target);
3029
3030 return node;
3031}
3032
3036static pm_index_or_write_node_t *
3037pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3038 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3039 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3040
3041 pm_index_arguments_check(parser, target->arguments, target->block);
3042
3043 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3044 *node = (pm_index_or_write_node_t) {
3045 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
3046 .receiver = target->receiver,
3047 .call_operator_loc = target->call_operator_loc,
3048 .opening_loc = target->opening_loc,
3049 .arguments = target->arguments,
3050 .closing_loc = target->closing_loc,
3051 .block = (pm_block_argument_node_t *) target->block,
3052 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3053 .value = value
3054 };
3055
3056 // Here we're going to free the target, since it is no longer necessary.
3057 // However, we don't want to call `pm_node_destroy` because we want to keep
3058 // around all of its children since we just reused them.
3059 xfree(target);
3060
3061 return node;
3062}
3063
3068static pm_call_target_node_t *
3069pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3070 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3071
3072 *node = (pm_call_target_node_t) {
3073 .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
3074 .receiver = target->receiver,
3075 .call_operator_loc = target->call_operator_loc,
3076 .name = target->name,
3077 .message_loc = target->message_loc
3078 };
3079
3080 // Here we're going to free the target, since it is no longer necessary.
3081 // However, we don't want to call `pm_node_destroy` because we want to keep
3082 // around all of its children since we just reused them.
3083 xfree(target);
3084
3085 return node;
3086}
3087
3092static pm_index_target_node_t *
3093pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3094 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3095
3096 pm_index_arguments_check(parser, target->arguments, target->block);
3097 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3098
3099 *node = (pm_index_target_node_t) {
3100 .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
3101 .receiver = target->receiver,
3102 .opening_loc = target->opening_loc,
3103 .arguments = target->arguments,
3104 .closing_loc = target->closing_loc,
3105 .block = (pm_block_argument_node_t *) target->block,
3106 };
3107
3108 // Here we're going to free the target, since it is no longer necessary.
3109 // However, we don't want to call `pm_node_destroy` because we want to keep
3110 // around all of its children since we just reused them.
3111 xfree(target);
3112
3113 return node;
3114}
3115
3119static pm_capture_pattern_node_t *
3120pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3121 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3122
3123 *node = (pm_capture_pattern_node_t) {
3124 .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
3125 .value = value,
3126 .target = target,
3127 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3128 };
3129
3130 return node;
3131}
3132
3136static pm_case_node_t *
3137pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3138 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3139
3140 *node = (pm_case_node_t) {
3141 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
3142 .predicate = predicate,
3143 .else_clause = NULL,
3144 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3145 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3146 .conditions = { 0 }
3147 };
3148
3149 return node;
3150}
3151
3155static void
3156pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3157 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3158
3159 pm_node_list_append(&node->conditions, condition);
3160 node->base.location.end = condition->location.end;
3161}
3162
3166static void
3167pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3168 node->else_clause = else_clause;
3169 node->base.location.end = else_clause->base.location.end;
3170}
3171
3175static void
3176pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3177 node->base.location.end = end_keyword->end;
3178 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3179}
3180
3184static pm_case_match_node_t *
3185pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3186 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3187
3188 *node = (pm_case_match_node_t) {
3189 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
3190 .predicate = predicate,
3191 .else_clause = NULL,
3192 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3193 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3194 .conditions = { 0 }
3195 };
3196
3197 return node;
3198}
3199
3203static void
3204pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3205 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3206
3207 pm_node_list_append(&node->conditions, condition);
3208 node->base.location.end = condition->location.end;
3209}
3210
3214static void
3215pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3216 node->else_clause = else_clause;
3217 node->base.location.end = else_clause->base.location.end;
3218}
3219
3223static void
3224pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3225 node->base.location.end = end_keyword->end;
3226 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3227}
3228
3232static pm_class_node_t *
3233pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3234 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3235
3236 *node = (pm_class_node_t) {
3237 .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
3238 .locals = *locals,
3239 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3240 .constant_path = constant_path,
3241 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3242 .superclass = superclass,
3243 .body = body,
3244 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3245 .name = pm_parser_constant_id_token(parser, name)
3246 };
3247
3248 return node;
3249}
3250
3254static pm_class_variable_and_write_node_t *
3255pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3256 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3257 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3258
3259 *node = (pm_class_variable_and_write_node_t) {
3260 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
3261 .name = target->name,
3262 .name_loc = target->base.location,
3263 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3264 .value = value
3265 };
3266
3267 return node;
3268}
3269
3273static pm_class_variable_operator_write_node_t *
3274pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3275 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3276
3277 *node = (pm_class_variable_operator_write_node_t) {
3278 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
3279 .name = target->name,
3280 .name_loc = target->base.location,
3281 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3282 .value = value,
3283 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3284 };
3285
3286 return node;
3287}
3288
3292static pm_class_variable_or_write_node_t *
3293pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3294 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3295 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3296
3297 *node = (pm_class_variable_or_write_node_t) {
3298 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
3299 .name = target->name,
3300 .name_loc = target->base.location,
3301 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3302 .value = value
3303 };
3304
3305 return node;
3306}
3307
3311static pm_class_variable_read_node_t *
3312pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3313 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3314 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3315
3316 *node = (pm_class_variable_read_node_t) {
3317 .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
3318 .name = pm_parser_constant_id_token(parser, token)
3319 };
3320
3321 return node;
3322}
3323
3330static inline pm_node_flags_t
3331pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3332 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3333 return flags;
3334 }
3335 return 0;
3336}
3337
3341static pm_class_variable_write_node_t *
3342pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3343 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3344 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3345
3346 *node = (pm_class_variable_write_node_t) {
3347 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
3348 .name = read_node->name,
3349 .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
3350 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3351 .value = value
3352 };
3353
3354 return node;
3355}
3356
3360static pm_constant_path_and_write_node_t *
3361pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3362 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3363 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3364
3365 *node = (pm_constant_path_and_write_node_t) {
3366 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
3367 .target = target,
3368 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3369 .value = value
3370 };
3371
3372 return node;
3373}
3374
3378static pm_constant_path_operator_write_node_t *
3379pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3380 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3381
3382 *node = (pm_constant_path_operator_write_node_t) {
3383 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
3384 .target = target,
3385 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3386 .value = value,
3387 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3388 };
3389
3390 return node;
3391}
3392
3396static pm_constant_path_or_write_node_t *
3397pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3398 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3399 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3400
3401 *node = (pm_constant_path_or_write_node_t) {
3402 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
3403 .target = target,
3404 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3405 .value = value
3406 };
3407
3408 return node;
3409}
3410
3414static pm_constant_path_node_t *
3415pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3416 pm_assert_value_expression(parser, parent);
3417 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3418
3419 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3420 if (name_token->type == PM_TOKEN_CONSTANT) {
3421 name = pm_parser_constant_id_token(parser, name_token);
3422 }
3423
3424 if (parent == NULL) {
3425 *node = (pm_constant_path_node_t) {
3426 .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
3427 .parent = parent,
3428 .name = name,
3429 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3430 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3431 };
3432 } else {
3433 *node = (pm_constant_path_node_t) {
3434 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
3435 .parent = parent,
3436 .name = name,
3437 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3438 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3439 };
3440 }
3441
3442 return node;
3443}
3444
3448static pm_constant_path_write_node_t *
3449pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3450 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3451 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3452
3453 *node = (pm_constant_path_write_node_t) {
3454 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
3455 .target = target,
3456 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3457 .value = value
3458 };
3459
3460 return node;
3461}
3462
3466static pm_constant_and_write_node_t *
3467pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3468 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3469 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3470
3471 *node = (pm_constant_and_write_node_t) {
3472 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
3473 .name = target->name,
3474 .name_loc = target->base.location,
3475 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3476 .value = value
3477 };
3478
3479 return node;
3480}
3481
3485static pm_constant_operator_write_node_t *
3486pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3487 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3488
3489 *node = (pm_constant_operator_write_node_t) {
3490 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
3491 .name = target->name,
3492 .name_loc = target->base.location,
3493 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3494 .value = value,
3495 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3496 };
3497
3498 return node;
3499}
3500
3504static pm_constant_or_write_node_t *
3505pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3506 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3507 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3508
3509 *node = (pm_constant_or_write_node_t) {
3510 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
3511 .name = target->name,
3512 .name_loc = target->base.location,
3513 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3514 .value = value
3515 };
3516
3517 return node;
3518}
3519
3523static pm_constant_read_node_t *
3524pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3525 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3526 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3527
3528 *node = (pm_constant_read_node_t) {
3529 .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
3530 .name = pm_parser_constant_id_token(parser, name)
3531 };
3532
3533 return node;
3534}
3535
3539static pm_constant_write_node_t *
3540pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3541 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3542 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3543
3544 *node = (pm_constant_write_node_t) {
3545 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
3546 .name = target->name,
3547 .name_loc = target->base.location,
3548 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3549 .value = value
3550 };
3551
3552 return node;
3553}
3554
3558static void
3559pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3560 switch (PM_NODE_TYPE(node)) {
3561 case PM_BEGIN_NODE: {
3562 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3563 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3564 break;
3565 }
3566 case PM_PARENTHESES_NODE: {
3567 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3568 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3569 break;
3570 }
3571 case PM_STATEMENTS_NODE: {
3572 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3573 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3574 break;
3575 }
3576 case PM_ARRAY_NODE:
3577 case PM_FLOAT_NODE:
3578 case PM_IMAGINARY_NODE:
3579 case PM_INTEGER_NODE:
3580 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3581 case PM_INTERPOLATED_STRING_NODE:
3582 case PM_INTERPOLATED_SYMBOL_NODE:
3583 case PM_INTERPOLATED_X_STRING_NODE:
3584 case PM_RATIONAL_NODE:
3585 case PM_REGULAR_EXPRESSION_NODE:
3586 case PM_SOURCE_ENCODING_NODE:
3587 case PM_SOURCE_FILE_NODE:
3588 case PM_SOURCE_LINE_NODE:
3589 case PM_STRING_NODE:
3590 case PM_SYMBOL_NODE:
3591 case PM_X_STRING_NODE:
3592 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3593 break;
3594 default:
3595 break;
3596 }
3597}
3598
3602static pm_def_node_t *
3603pm_def_node_create(
3604 pm_parser_t *parser,
3605 pm_constant_id_t name,
3606 const pm_token_t *name_loc,
3607 pm_node_t *receiver,
3608 pm_parameters_node_t *parameters,
3609 pm_node_t *body,
3610 pm_constant_id_list_t *locals,
3611 const pm_token_t *def_keyword,
3612 const pm_token_t *operator,
3613 const pm_token_t *lparen,
3614 const pm_token_t *rparen,
3615 const pm_token_t *equal,
3616 const pm_token_t *end_keyword
3617) {
3618 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3619
3620 if (receiver != NULL) {
3621 pm_def_node_receiver_check(parser, receiver);
3622 }
3623
3624 *node = (pm_def_node_t) {
3625 .base = (
3626 (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
3627 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
3628 : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
3629 ),
3630 .name = name,
3631 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3632 .receiver = receiver,
3633 .parameters = parameters,
3634 .body = body,
3635 .locals = *locals,
3636 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3637 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3638 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3639 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3640 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3641 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3642 };
3643
3644 return node;
3645}
3646
3650static pm_defined_node_t *
3651pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3652 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3653
3654 *node = (pm_defined_node_t) {
3655 .base = (
3656 (rparen->type == PM_TOKEN_NOT_PROVIDED)
3657 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
3658 : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
3659 ),
3660 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3661 .value = value,
3662 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3663 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
3664 };
3665
3666 return node;
3667}
3668
3672static pm_else_node_t *
3673pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3674 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3675
3676 *node = (pm_else_node_t) {
3677 .base = (
3678 ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
3679 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
3680 : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
3681 ),
3682 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3683 .statements = statements,
3684 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3685 };
3686
3687 return node;
3688}
3689
3693static pm_embedded_statements_node_t *
3694pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3695 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3696
3697 *node = (pm_embedded_statements_node_t) {
3698 .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
3699 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3700 .statements = statements,
3701 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3702 };
3703
3704 return node;
3705}
3706
3710static pm_embedded_variable_node_t *
3711pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3712 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3713
3714 *node = (pm_embedded_variable_node_t) {
3715 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
3716 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3717 .variable = variable
3718 };
3719
3720 return node;
3721}
3722
3726static pm_ensure_node_t *
3727pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3728 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
3729
3730 *node = (pm_ensure_node_t) {
3731 .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
3732 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
3733 .statements = statements,
3734 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
3735 };
3736
3737 return node;
3738}
3739
3743static pm_false_node_t *
3744pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3745 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3746 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
3747
3748 *node = (pm_false_node_t) {
3749 .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
3750 };
3751
3752 return node;
3753}
3754
3759static pm_find_pattern_node_t *
3760pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3761 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
3762
3763 pm_node_t *left = nodes->nodes[0];
3764 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3765 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3766
3767 pm_node_t *right;
3768
3769 if (nodes->size == 1) {
3770 right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
3771 } else {
3772 right = nodes->nodes[nodes->size - 1];
3773 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3774 }
3775
3776#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3777 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3778 // The resulting AST will anyway be ignored, but this file still needs to compile.
3779 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3780#else
3781 pm_node_t *right_splat_node = right;
3782#endif
3783 *node = (pm_find_pattern_node_t) {
3784 .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
3785 .constant = NULL,
3786 .left = left_splat_node,
3787 .right = right_splat_node,
3788 .requireds = { 0 },
3789 .opening_loc = { 0 },
3790 .closing_loc = { 0 }
3791 };
3792
3793 // For now we're going to just copy over each pointer manually. This could be
3794 // much more efficient, as we could instead resize the node list to only point
3795 // to 1...-1.
3796 for (size_t index = 1; index < nodes->size - 1; index++) {
3797 pm_node_list_append(&node->requireds, nodes->nodes[index]);
3798 }
3799
3800 return node;
3801}
3802
3807static double
3808pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3809 ptrdiff_t diff = token->end - token->start;
3810 if (diff <= 0) return 0.0;
3811
3812 // First, get a buffer of the content.
3813 size_t length = (size_t) diff;
3814 char *buffer = xmalloc(sizeof(char) * (length + 1));
3815 memcpy((void *) buffer, token->start, length);
3816
3817 // Next, determine if we need to replace the decimal point because of
3818 // locale-specific options, and then normalize them if we have to.
3819 char decimal_point = *localeconv()->decimal_point;
3820 if (decimal_point != '.') {
3821 for (size_t index = 0; index < length; index++) {
3822 if (buffer[index] == '.') buffer[index] = decimal_point;
3823 }
3824 }
3825
3826 // Next, handle underscores by removing them from the buffer.
3827 for (size_t index = 0; index < length; index++) {
3828 if (buffer[index] == '_') {
3829 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3830 length--;
3831 }
3832 }
3833
3834 // Null-terminate the buffer so that strtod cannot read off the end.
3835 buffer[length] = '\0';
3836
3837 // Now, call strtod to parse the value. Note that CRuby has their own
3838 // version of strtod which avoids locales. We're okay using the locale-aware
3839 // version because we've already validated through the parser that the token
3840 // is in a valid format.
3841 errno = 0;
3842 char *eptr;
3843 double value = strtod(buffer, &eptr);
3844
3845 // This should never happen, because we've already checked that the token
3846 // is in a valid format. However it's good to be safe.
3847 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3848 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
3849 xfree((void *) buffer);
3850 return 0.0;
3851 }
3852
3853 // If errno is set, then it should only be ERANGE. At this point we need to
3854 // check if it's infinity (it should be).
3855 if (errno == ERANGE && PRISM_ISINF(value)) {
3856 int warn_width;
3857 const char *ellipsis;
3858
3859 if (length > 20) {
3860 warn_width = 20;
3861 ellipsis = "...";
3862 } else {
3863 warn_width = (int) length;
3864 ellipsis = "";
3865 }
3866
3867 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3868 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3869 }
3870
3871 // Finally we can free the buffer and return the value.
3872 xfree((void *) buffer);
3873 return value;
3874}
3875
3879static pm_float_node_t *
3880pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3881 assert(token->type == PM_TOKEN_FLOAT);
3882 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
3883
3884 *node = (pm_float_node_t) {
3885 .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3886 .value = pm_double_parse(parser, token)
3887 };
3888
3889 return node;
3890}
3891
3895static pm_imaginary_node_t *
3896pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3897 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3898
3899 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3900 *node = (pm_imaginary_node_t) {
3901 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3902 .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
3903 .type = PM_TOKEN_FLOAT,
3904 .start = token->start,
3905 .end = token->end - 1
3906 })))
3907 };
3908
3909 return node;
3910}
3911
3915static pm_rational_node_t *
3916pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3917 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3918
3919 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
3920 *node = (pm_rational_node_t) {
3921 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
3922 .numerator = { 0 },
3923 .denominator = { 0 }
3924 };
3925
3926 const uint8_t *start = token->start;
3927 const uint8_t *end = token->end - 1; // r
3928
3929 while (start < end && *start == '0') start++; // 0.1 -> .1
3930 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3931
3932 size_t length = (size_t) (end - start);
3933 if (length == 1) {
3934 node->denominator.value = 1;
3935 return node;
3936 }
3937
3938 const uint8_t *point = memchr(start, '.', length);
3939 assert(point && "should have a decimal point");
3940
3941 uint8_t *digits = xmalloc(length);
3942 if (digits == NULL) {
3943 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3944 abort();
3945 }
3946
3947 memcpy(digits, start, (unsigned long) (point - start));
3948 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3949 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3950
3951 digits[0] = '1';
3952 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
3953 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
3954 xfree(digits);
3955
3956 pm_integers_reduce(&node->numerator, &node->denominator);
3957 return node;
3958}
3959
3964static pm_imaginary_node_t *
3965pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3966 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
3967
3968 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3969 *node = (pm_imaginary_node_t) {
3970 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3971 .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
3972 .type = PM_TOKEN_FLOAT_RATIONAL,
3973 .start = token->start,
3974 .end = token->end - 1
3975 })))
3976 };
3977
3978 return node;
3979}
3980
3984static pm_for_node_t *
3985pm_for_node_create(
3986 pm_parser_t *parser,
3987 pm_node_t *index,
3988 pm_node_t *collection,
3989 pm_statements_node_t *statements,
3990 const pm_token_t *for_keyword,
3991 const pm_token_t *in_keyword,
3992 const pm_token_t *do_keyword,
3993 const pm_token_t *end_keyword
3994) {
3995 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
3996
3997 *node = (pm_for_node_t) {
3998 .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
3999 .index = index,
4000 .collection = collection,
4001 .statements = statements,
4002 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4003 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4004 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4005 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4006 };
4007
4008 return node;
4009}
4010
4014static pm_forwarding_arguments_node_t *
4015pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4016 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4017 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4018
4019 *node = (pm_forwarding_arguments_node_t) {
4020 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
4021 };
4022
4023 return node;
4024}
4025
4029static pm_forwarding_parameter_node_t *
4030pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4031 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4032 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4033
4034 *node = (pm_forwarding_parameter_node_t) {
4035 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
4036 };
4037
4038 return node;
4039}
4040
4044static pm_forwarding_super_node_t *
4045pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4046 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4047 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4048 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4049
4050 pm_block_node_t *block = NULL;
4051 if (arguments->block != NULL) {
4052 block = (pm_block_node_t *) arguments->block;
4053 }
4054
4055 *node = (pm_forwarding_super_node_t) {
4056 .base = (
4057 (block == NULL)
4058 ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
4059 : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
4060 ),
4061 .block = block
4062 };
4063
4064 return node;
4065}
4066
4071static pm_hash_pattern_node_t *
4072pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4073 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4074
4075 *node = (pm_hash_pattern_node_t) {
4076 .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
4077 .constant = NULL,
4078 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4079 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4080 .elements = { 0 },
4081 .rest = NULL
4082 };
4083
4084 return node;
4085}
4086
4090static pm_hash_pattern_node_t *
4091pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4092 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4093
4094 const uint8_t *start;
4095 const uint8_t *end;
4096
4097 if (elements->size > 0) {
4098 if (rest) {
4099 start = elements->nodes[0]->location.start;
4100 end = rest->location.end;
4101 } else {
4102 start = elements->nodes[0]->location.start;
4103 end = elements->nodes[elements->size - 1]->location.end;
4104 }
4105 } else {
4106 assert(rest != NULL);
4107 start = rest->location.start;
4108 end = rest->location.end;
4109 }
4110
4111 *node = (pm_hash_pattern_node_t) {
4112 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
4113 .constant = NULL,
4114 .elements = { 0 },
4115 .rest = rest,
4116 .opening_loc = { 0 },
4117 .closing_loc = { 0 }
4118 };
4119
4120 pm_node_t *element;
4121 PM_NODE_LIST_FOREACH(elements, index, element) {
4122 pm_node_list_append(&node->elements, element);
4123 }
4124
4125 return node;
4126}
4127
4131static pm_constant_id_t
4132pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4133 switch (PM_NODE_TYPE(target)) {
4134 case PM_GLOBAL_VARIABLE_READ_NODE:
4135 return ((pm_global_variable_read_node_t *) target)->name;
4136 case PM_BACK_REFERENCE_READ_NODE:
4137 return ((pm_back_reference_read_node_t *) target)->name;
4138 case PM_NUMBERED_REFERENCE_READ_NODE:
4139 // This will only ever happen in the event of a syntax error, but we
4140 // still need to provide something for the node.
4141 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4142 default:
4143 assert(false && "unreachable");
4144 return (pm_constant_id_t) -1;
4145 }
4146}
4147
4151static pm_global_variable_and_write_node_t *
4152pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4153 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4154 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4155
4156 *node = (pm_global_variable_and_write_node_t) {
4157 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
4158 .name = pm_global_variable_write_name(parser, target),
4159 .name_loc = target->location,
4160 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4161 .value = value
4162 };
4163
4164 return node;
4165}
4166
4170static pm_global_variable_operator_write_node_t *
4171pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4172 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4173
4174 *node = (pm_global_variable_operator_write_node_t) {
4175 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4176 .name = pm_global_variable_write_name(parser, target),
4177 .name_loc = target->location,
4178 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4179 .value = value,
4180 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4181 };
4182
4183 return node;
4184}
4185
4189static pm_global_variable_or_write_node_t *
4190pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4191 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4192 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4193
4194 *node = (pm_global_variable_or_write_node_t) {
4195 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
4196 .name = pm_global_variable_write_name(parser, target),
4197 .name_loc = target->location,
4198 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4199 .value = value
4200 };
4201
4202 return node;
4203}
4204
4208static pm_global_variable_read_node_t *
4209pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4210 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4211
4212 *node = (pm_global_variable_read_node_t) {
4213 .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
4214 .name = pm_parser_constant_id_token(parser, name)
4215 };
4216
4217 return node;
4218}
4219
4223static pm_global_variable_read_node_t *
4224pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4225 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4226
4227 *node = (pm_global_variable_read_node_t) {
4228 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
4229 .name = name
4230 };
4231
4232 return node;
4233}
4234
4238static pm_global_variable_write_node_t *
4239pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4240 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4241 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4242
4243 *node = (pm_global_variable_write_node_t) {
4244 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
4245 .name = pm_global_variable_write_name(parser, target),
4246 .name_loc = PM_LOCATION_NODE_VALUE(target),
4247 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4248 .value = value
4249 };
4250
4251 return node;
4252}
4253
4257static pm_global_variable_write_node_t *
4258pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4259 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4260
4261 *node = (pm_global_variable_write_node_t) {
4262 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
4263 .name = name,
4264 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4265 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4266 .value = value
4267 };
4268
4269 return node;
4270}
4271
4275static pm_hash_node_t *
4276pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4277 assert(opening != NULL);
4278 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4279
4280 *node = (pm_hash_node_t) {
4281 .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4282 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4283 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4284 .elements = { 0 }
4285 };
4286
4287 return node;
4288}
4289
4293static inline void
4294pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4295 pm_node_list_append(&hash->elements, element);
4296
4297 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4298 if (static_literal) {
4299 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4300 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4301 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4302 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4303 }
4304
4305 if (!static_literal) {
4306 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4307 }
4308}
4309
4310static inline void
4311pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4312 hash->base.location.end = token->end;
4313 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4314}
4315
4319static pm_if_node_t *
4320pm_if_node_create(pm_parser_t *parser,
4321 const pm_token_t *if_keyword,
4322 pm_node_t *predicate,
4323 const pm_token_t *then_keyword,
4324 pm_statements_node_t *statements,
4325 pm_node_t *subsequent,
4326 const pm_token_t *end_keyword
4327) {
4328 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4329 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4330
4331 const uint8_t *end;
4332 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4333 end = end_keyword->end;
4334 } else if (subsequent != NULL) {
4335 end = subsequent->location.end;
4336 } else if (pm_statements_node_body_length(statements) != 0) {
4337 end = statements->base.location.end;
4338 } else {
4339 end = predicate->location.end;
4340 }
4341
4342 *node = (pm_if_node_t) {
4343 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
4344 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4345 .predicate = predicate,
4346 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4347 .statements = statements,
4348 .subsequent = subsequent,
4349 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4350 };
4351
4352 return node;
4353}
4354
4358static pm_if_node_t *
4359pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4360 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4361 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4362
4363 pm_statements_node_t *statements = pm_statements_node_create(parser);
4364 pm_statements_node_body_append(parser, statements, statement, true);
4365
4366 *node = (pm_if_node_t) {
4367 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
4368 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4369 .predicate = predicate,
4370 .then_keyword_loc = { 0 },
4371 .statements = statements,
4372 .subsequent = NULL,
4373 .end_keyword_loc = { 0 }
4374 };
4375
4376 return node;
4377}
4378
4382static pm_if_node_t *
4383pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4384 pm_assert_value_expression(parser, predicate);
4385 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4386
4387 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4388 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4389
4390 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4391 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4392
4393 pm_token_t end_keyword = not_provided(parser);
4394 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4395
4396 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4397
4398 *node = (pm_if_node_t) {
4399 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
4400 .if_keyword_loc = { 0 },
4401 .predicate = predicate,
4402 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4403 .statements = if_statements,
4404 .subsequent = UP(else_node),
4405 .end_keyword_loc = { 0 }
4406 };
4407
4408 return node;
4409
4410}
4411
4412static inline void
4413pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4414 node->base.location.end = keyword->end;
4415 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4416}
4417
4418static inline void
4419pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4420 node->base.location.end = keyword->end;
4421 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4422}
4423
4427static pm_implicit_node_t *
4428pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4429 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4430
4431 *node = (pm_implicit_node_t) {
4432 .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
4433 .value = value
4434 };
4435
4436 return node;
4437}
4438
4442static pm_implicit_rest_node_t *
4443pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4444 assert(token->type == PM_TOKEN_COMMA);
4445
4446 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4447
4448 *node = (pm_implicit_rest_node_t) {
4449 .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
4450 };
4451
4452 return node;
4453}
4454
4458static pm_integer_node_t *
4459pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4460 assert(token->type == PM_TOKEN_INTEGER);
4461 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4462
4463 *node = (pm_integer_node_t) {
4464 .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4465 .value = { 0 }
4466 };
4467
4468 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4469 switch (base) {
4470 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4471 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4472 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4473 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4474 default: assert(false && "unreachable"); break;
4475 }
4476
4477 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4478 return node;
4479}
4480
4485static pm_imaginary_node_t *
4486pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4487 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4488
4489 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4490 *node = (pm_imaginary_node_t) {
4491 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4492 .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4493 .type = PM_TOKEN_INTEGER,
4494 .start = token->start,
4495 .end = token->end - 1
4496 })))
4497 };
4498
4499 return node;
4500}
4501
4506static pm_rational_node_t *
4507pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4508 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4509
4510 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4511 *node = (pm_rational_node_t) {
4512 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4513 .numerator = { 0 },
4514 .denominator = { .value = 1, 0 }
4515 };
4516
4517 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4518 switch (base) {
4519 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4520 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4521 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4522 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4523 default: assert(false && "unreachable"); break;
4524 }
4525
4526 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4527
4528 return node;
4529}
4530
4535static pm_imaginary_node_t *
4536pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4537 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4538
4539 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4540 *node = (pm_imaginary_node_t) {
4541 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4542 .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4543 .type = PM_TOKEN_INTEGER_RATIONAL,
4544 .start = token->start,
4545 .end = token->end - 1
4546 })))
4547 };
4548
4549 return node;
4550}
4551
4555static pm_in_node_t *
4556pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4557 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4558
4559 const uint8_t *end;
4560 if (statements != NULL) {
4561 end = statements->base.location.end;
4562 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4563 end = then_keyword->end;
4564 } else {
4565 end = pattern->location.end;
4566 }
4567
4568 *node = (pm_in_node_t) {
4569 .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
4570 .pattern = pattern,
4571 .statements = statements,
4572 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4573 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
4574 };
4575
4576 return node;
4577}
4578
4582static pm_instance_variable_and_write_node_t *
4583pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4584 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4585 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
4586
4587 *node = (pm_instance_variable_and_write_node_t) {
4588 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
4589 .name = target->name,
4590 .name_loc = target->base.location,
4591 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4592 .value = value
4593 };
4594
4595 return node;
4596}
4597
4601static pm_instance_variable_operator_write_node_t *
4602pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4603 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
4604
4605 *node = (pm_instance_variable_operator_write_node_t) {
4606 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4607 .name = target->name,
4608 .name_loc = target->base.location,
4609 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4610 .value = value,
4611 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4612 };
4613
4614 return node;
4615}
4616
4620static pm_instance_variable_or_write_node_t *
4621pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4622 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4623 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
4624
4625 *node = (pm_instance_variable_or_write_node_t) {
4626 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
4627 .name = target->name,
4628 .name_loc = target->base.location,
4629 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4630 .value = value
4631 };
4632
4633 return node;
4634}
4635
4639static pm_instance_variable_read_node_t *
4640pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4641 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4642 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
4643
4644 *node = (pm_instance_variable_read_node_t) {
4645 .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
4646 .name = pm_parser_constant_id_token(parser, token)
4647 };
4648
4649 return node;
4650}
4651
4656static pm_instance_variable_write_node_t *
4657pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4658 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
4659 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4660
4661 *node = (pm_instance_variable_write_node_t) {
4662 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
4663 .name = read_node->name,
4664 .name_loc = PM_LOCATION_NODE_VALUE(read_node),
4665 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4666 .value = value
4667 };
4668
4669 return node;
4670}
4671
4677static void
4678pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4679 switch (PM_NODE_TYPE(part)) {
4680 case PM_STRING_NODE:
4681 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4682 break;
4683 case PM_EMBEDDED_STATEMENTS_NODE: {
4684 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4685 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4686
4687 if (embedded == NULL) {
4688 // If there are no statements or more than one statement, then
4689 // we lose the static literal flag.
4690 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4691 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4692 // If the embedded statement is a string, then we can keep the
4693 // static literal flag and mark the string as frozen.
4694 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4695 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4696 // If the embedded statement is an interpolated string and it's
4697 // a static literal, then we can keep the static literal flag.
4698 } else {
4699 // Otherwise we lose the static literal flag.
4700 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4701 }
4702
4703 break;
4704 }
4705 case PM_EMBEDDED_VARIABLE_NODE:
4706 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4707 break;
4708 default:
4709 assert(false && "unexpected node type");
4710 break;
4711 }
4712
4713 pm_node_list_append(parts, part);
4714}
4715
4719static pm_interpolated_regular_expression_node_t *
4720pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4721 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
4722
4723 *node = (pm_interpolated_regular_expression_node_t) {
4724 .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4725 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4726 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
4727 .parts = { 0 }
4728 };
4729
4730 return node;
4731}
4732
4733static inline void
4734pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4735 if (node->base.location.start > part->location.start) {
4736 node->base.location.start = part->location.start;
4737 }
4738 if (node->base.location.end < part->location.end) {
4739 node->base.location.end = part->location.end;
4740 }
4741
4742 pm_interpolated_node_append(UP(node), &node->parts, part);
4743}
4744
4745static inline void
4746pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4747 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4748 node->base.location.end = closing->end;
4749 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4750}
4751
4775static inline void
4776pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
4777#define CLEAR_FLAGS(node) \
4778 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4779
4780#define MUTABLE_FLAGS(node) \
4781 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4782
4783 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4784 node->base.location.start = part->location.start;
4785 }
4786
4787 node->base.location.end = MAX(node->base.location.end, part->location.end);
4788
4789 switch (PM_NODE_TYPE(part)) {
4790 case PM_STRING_NODE:
4791 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4792 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4793 // as long as this interpolation only consists of other string literals.
4794 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4795 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4796 }
4797 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4798 break;
4799 case PM_INTERPOLATED_STRING_NODE:
4800 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4801 // If the string that we're concatenating is a static literal,
4802 // then we can keep the static literal flag for this string.
4803 } else {
4804 // Otherwise, we lose the static literal flag here and we should
4805 // also clear the mutability flags.
4806 CLEAR_FLAGS(node);
4807 }
4808 break;
4809 case PM_EMBEDDED_STATEMENTS_NODE: {
4810 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4811 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4812
4813 if (embedded == NULL) {
4814 // If we're embedding multiple statements or no statements, then
4815 // the string is not longer a static literal.
4816 CLEAR_FLAGS(node);
4817 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4818 // If the embedded statement is a string, then we can make that
4819 // string as frozen and static literal, and not touch the static
4820 // literal status of this string.
4821 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4822
4823 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4824 MUTABLE_FLAGS(node);
4825 }
4826 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4827 // If the embedded statement is an interpolated string, but that
4828 // string is marked as static literal, then we can keep our
4829 // static literal status for this string.
4830 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4831 MUTABLE_FLAGS(node);
4832 }
4833 } else {
4834 // In all other cases, we lose the static literal flag here and
4835 // become mutable.
4836 CLEAR_FLAGS(node);
4837 }
4838
4839 break;
4840 }
4841 case PM_EMBEDDED_VARIABLE_NODE:
4842 // Embedded variables clear static literal, which means we also
4843 // should clear the mutability flags.
4844 CLEAR_FLAGS(node);
4845 break;
4846 case PM_X_STRING_NODE:
4847 case PM_INTERPOLATED_X_STRING_NODE:
4848 case PM_SYMBOL_NODE:
4849 case PM_INTERPOLATED_SYMBOL_NODE:
4850 // These will only happen in error cases. But we want to handle it
4851 // here so that we don't fail the assertion.
4852 CLEAR_FLAGS(node);
4853 break;
4854 default:
4855 assert(false && "unexpected node type");
4856 break;
4857 }
4858
4859 pm_node_list_append(&node->parts, part);
4860
4861#undef CLEAR_FLAGS
4862#undef MUTABLE_FLAGS
4863}
4864
4868static pm_interpolated_string_node_t *
4869pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4870 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
4871 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4872
4873 switch (parser->frozen_string_literal) {
4874 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4875 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4876 break;
4877 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4878 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4879 break;
4880 }
4881
4882 *node = (pm_interpolated_string_node_t) {
4883 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
4884 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4885 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4886 .parts = { 0 }
4887 };
4888
4889 if (parts != NULL) {
4890 pm_node_t *part;
4891 PM_NODE_LIST_FOREACH(parts, index, part) {
4892 pm_interpolated_string_node_append(node, part);
4893 }
4894 }
4895
4896 return node;
4897}
4898
4902static void
4903pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4904 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4905 node->base.location.end = closing->end;
4906}
4907
4908static void
4909pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4910 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4911 node->base.location.start = part->location.start;
4912 }
4913
4914 pm_interpolated_node_append(UP(node), &node->parts, part);
4915 node->base.location.end = MAX(node->base.location.end, part->location.end);
4916}
4917
4918static void
4919pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4920 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4921 node->base.location.end = closing->end;
4922}
4923
4927static pm_interpolated_symbol_node_t *
4928pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4929 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
4930
4931 *node = (pm_interpolated_symbol_node_t) {
4932 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
4933 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4934 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4935 .parts = { 0 }
4936 };
4937
4938 if (parts != NULL) {
4939 pm_node_t *part;
4940 PM_NODE_LIST_FOREACH(parts, index, part) {
4941 pm_interpolated_symbol_node_append(node, part);
4942 }
4943 }
4944
4945 return node;
4946}
4947
4951static pm_interpolated_x_string_node_t *
4952pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4953 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
4954
4955 *node = (pm_interpolated_x_string_node_t) {
4956 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
4957 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4958 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4959 .parts = { 0 }
4960 };
4961
4962 return node;
4963}
4964
4965static inline void
4966pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
4967 pm_interpolated_node_append(UP(node), &node->parts, part);
4968 node->base.location.end = part->location.end;
4969}
4970
4971static inline void
4972pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
4973 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4974 node->base.location.end = closing->end;
4975}
4976
4980static pm_it_local_variable_read_node_t *
4981pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4982 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
4983
4984 *node = (pm_it_local_variable_read_node_t) {
4985 .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
4986 };
4987
4988 return node;
4989}
4990
4994static pm_it_parameters_node_t *
4995pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4996 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
4997
4998 *node = (pm_it_parameters_node_t) {
4999 .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
5000 };
5001
5002 return node;
5003}
5004
5008static pm_keyword_hash_node_t *
5009pm_keyword_hash_node_create(pm_parser_t *parser) {
5010 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5011
5012 *node = (pm_keyword_hash_node_t) {
5013 .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
5014 .elements = { 0 }
5015 };
5016
5017 return node;
5018}
5019
5023static void
5024pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5025 // If the element being added is not an AssocNode or does not have a symbol
5026 // key, then we want to turn the SYMBOL_KEYS flag off.
5027 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5028 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5029 }
5030
5031 pm_node_list_append(&hash->elements, element);
5032 if (hash->base.location.start == NULL) {
5033 hash->base.location.start = element->location.start;
5034 }
5035 hash->base.location.end = element->location.end;
5036}
5037
5041static pm_required_keyword_parameter_node_t *
5042pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5043 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5044
5045 *node = (pm_required_keyword_parameter_node_t) {
5046 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
5047 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5048 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5049 };
5050
5051 return node;
5052}
5053
5057static pm_optional_keyword_parameter_node_t *
5058pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5059 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5060
5061 *node = (pm_optional_keyword_parameter_node_t) {
5062 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
5063 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5064 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5065 .value = value
5066 };
5067
5068 return node;
5069}
5070
5074static pm_keyword_rest_parameter_node_t *
5075pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5076 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5077
5078 *node = (pm_keyword_rest_parameter_node_t) {
5079 .base = (
5080 (name->type == PM_TOKEN_NOT_PROVIDED)
5081 ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
5082 : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
5083 ),
5084 .name = pm_parser_optional_constant_id_token(parser, name),
5085 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5086 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5087 };
5088
5089 return node;
5090}
5091
5095static pm_lambda_node_t *
5096pm_lambda_node_create(
5097 pm_parser_t *parser,
5098 pm_constant_id_list_t *locals,
5099 const pm_token_t *operator,
5100 const pm_token_t *opening,
5101 const pm_token_t *closing,
5102 pm_node_t *parameters,
5103 pm_node_t *body
5104) {
5105 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5106
5107 *node = (pm_lambda_node_t) {
5108 .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
5109 .locals = *locals,
5110 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5111 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5112 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5113 .parameters = parameters,
5114 .body = body
5115 };
5116
5117 return node;
5118}
5119
5123static pm_local_variable_and_write_node_t *
5124pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5125 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5126 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5127 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5128
5129 *node = (pm_local_variable_and_write_node_t) {
5130 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
5131 .name_loc = target->location,
5132 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5133 .value = value,
5134 .name = name,
5135 .depth = depth
5136 };
5137
5138 return node;
5139}
5140
5144static pm_local_variable_operator_write_node_t *
5145pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5146 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5147
5148 *node = (pm_local_variable_operator_write_node_t) {
5149 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
5150 .name_loc = target->location,
5151 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5152 .value = value,
5153 .name = name,
5154 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5155 .depth = depth
5156 };
5157
5158 return node;
5159}
5160
5164static pm_local_variable_or_write_node_t *
5165pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5166 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5167 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5168 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5169
5170 *node = (pm_local_variable_or_write_node_t) {
5171 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
5172 .name_loc = target->location,
5173 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5174 .value = value,
5175 .name = name,
5176 .depth = depth
5177 };
5178
5179 return node;
5180}
5181
5185static pm_local_variable_read_node_t *
5186pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5187 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5188
5189 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5190
5191 *node = (pm_local_variable_read_node_t) {
5192 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
5193 .name = name_id,
5194 .depth = depth
5195 };
5196
5197 return node;
5198}
5199
5203static pm_local_variable_read_node_t *
5204pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5205 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5206 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5207}
5208
5213static pm_local_variable_read_node_t *
5214pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5215 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5216 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5217}
5218
5222static pm_local_variable_write_node_t *
5223pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5224 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5225 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5226
5227 *node = (pm_local_variable_write_node_t) {
5228 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
5229 .name = name,
5230 .depth = depth,
5231 .value = value,
5232 .name_loc = *name_loc,
5233 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5234 };
5235
5236 return node;
5237}
5238
5242static inline bool
5243pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5244 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5245}
5246
5251static inline bool
5252pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5253 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5254}
5255
5260static inline void
5261pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5262 if (pm_token_is_numbered_parameter(start, end)) {
5263 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5264 }
5265}
5266
5271static pm_local_variable_target_node_t *
5272pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5273 pm_refute_numbered_parameter(parser, location->start, location->end);
5274 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5275
5276 *node = (pm_local_variable_target_node_t) {
5277 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
5278 .name = name,
5279 .depth = depth
5280 };
5281
5282 return node;
5283}
5284
5288static pm_match_predicate_node_t *
5289pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5290 pm_assert_value_expression(parser, value);
5291
5292 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5293
5294 *node = (pm_match_predicate_node_t) {
5295 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
5296 .value = value,
5297 .pattern = pattern,
5298 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5299 };
5300
5301 return node;
5302}
5303
5307static pm_match_required_node_t *
5308pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5309 pm_assert_value_expression(parser, value);
5310
5311 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5312
5313 *node = (pm_match_required_node_t) {
5314 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
5315 .value = value,
5316 .pattern = pattern,
5317 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5318 };
5319
5320 return node;
5321}
5322
5326static pm_match_write_node_t *
5327pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5328 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5329
5330 *node = (pm_match_write_node_t) {
5331 .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
5332 .call = call,
5333 .targets = { 0 }
5334 };
5335
5336 return node;
5337}
5338
5342static pm_module_node_t *
5343pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5344 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5345
5346 *node = (pm_module_node_t) {
5347 .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
5348 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5349 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5350 .constant_path = constant_path,
5351 .body = body,
5352 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5353 .name = pm_parser_constant_id_token(parser, name)
5354 };
5355
5356 return node;
5357}
5358
5362static pm_multi_target_node_t *
5363pm_multi_target_node_create(pm_parser_t *parser) {
5364 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5365
5366 *node = (pm_multi_target_node_t) {
5367 .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
5368 .lefts = { 0 },
5369 .rest = NULL,
5370 .rights = { 0 },
5371 .lparen_loc = { 0 },
5372 .rparen_loc = { 0 }
5373 };
5374
5375 return node;
5376}
5377
5381static void
5382pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5383 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5384 if (node->rest == NULL) {
5385 node->rest = target;
5386 } else {
5387 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5388 pm_node_list_append(&node->rights, target);
5389 }
5390 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5391 if (node->rest == NULL) {
5392 node->rest = target;
5393 } else {
5394 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5395 pm_node_list_append(&node->rights, target);
5396 }
5397 } else if (node->rest == NULL) {
5398 pm_node_list_append(&node->lefts, target);
5399 } else {
5400 pm_node_list_append(&node->rights, target);
5401 }
5402
5403 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5404 node->base.location.start = target->location.start;
5405 }
5406
5407 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
5408 node->base.location.end = target->location.end;
5409 }
5410}
5411
5415static void
5416pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
5417 node->base.location.start = lparen->start;
5418 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
5419}
5420
5424static void
5425pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
5426 node->base.location.end = rparen->end;
5427 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
5428}
5429
5433static pm_multi_write_node_t *
5434pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5435 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
5436 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5437
5438 *node = (pm_multi_write_node_t) {
5439 .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
5440 .lefts = target->lefts,
5441 .rest = target->rest,
5442 .rights = target->rights,
5443 .lparen_loc = target->lparen_loc,
5444 .rparen_loc = target->rparen_loc,
5445 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5446 .value = value
5447 };
5448
5449 // Explicitly do not call pm_node_destroy here because we want to keep
5450 // around all of the information within the MultiWriteNode node.
5451 xfree(target);
5452
5453 return node;
5454}
5455
5459static pm_next_node_t *
5460pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5461 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5462 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
5463
5464 *node = (pm_next_node_t) {
5465 .base = (
5466 (arguments == NULL)
5467 ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
5468 : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
5469 ),
5470 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5471 .arguments = arguments
5472 };
5473
5474 return node;
5475}
5476
5480static pm_nil_node_t *
5481pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5482 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5483 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
5484
5485 *node = (pm_nil_node_t) {
5486 .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
5487 };
5488
5489 return node;
5490}
5491
5495static pm_no_keywords_parameter_node_t *
5496pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5497 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5498 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5499 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
5500
5501 *node = (pm_no_keywords_parameter_node_t) {
5502 .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
5503 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5504 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
5505 };
5506
5507 return node;
5508}
5509
5513static pm_numbered_parameters_node_t *
5514pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
5515 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
5516
5517 *node = (pm_numbered_parameters_node_t) {
5518 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
5519 .maximum = maximum
5520 };
5521
5522 return node;
5523}
5524
5529#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5530
5537static uint32_t
5538pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5539 const uint8_t *start = token->start + 1;
5540 const uint8_t *end = token->end;
5541
5542 ptrdiff_t diff = end - start;
5543 assert(diff > 0);
5544#if PTRDIFF_MAX > SIZE_MAX
5545 assert(diff < (ptrdiff_t) SIZE_MAX);
5546#endif
5547 size_t length = (size_t) diff;
5548
5549 char *digits = xcalloc(length + 1, sizeof(char));
5550 memcpy(digits, start, length);
5551 digits[length] = '\0';
5552
5553 char *endptr;
5554 errno = 0;
5555 unsigned long value = strtoul(digits, &endptr, 10);
5556
5557 if ((digits == endptr) || (*endptr != '\0')) {
5558 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
5559 value = 0;
5560 }
5561
5562 xfree(digits);
5563
5564 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5565 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5566 value = 0;
5567 }
5568
5569 return (uint32_t) value;
5570}
5571
5572#undef NTH_REF_MAX
5573
5577static pm_numbered_reference_read_node_t *
5578pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5579 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5580 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
5581
5582 *node = (pm_numbered_reference_read_node_t) {
5583 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
5584 .number = pm_numbered_reference_read_node_number(parser, name)
5585 };
5586
5587 return node;
5588}
5589
5593static pm_optional_parameter_node_t *
5594pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5595 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
5596
5597 *node = (pm_optional_parameter_node_t) {
5598 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
5599 .name = pm_parser_constant_id_token(parser, name),
5600 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5601 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5602 .value = value
5603 };
5604
5605 return node;
5606}
5607
5611static pm_or_node_t *
5612pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5613 pm_assert_value_expression(parser, left);
5614
5615 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
5616
5617 *node = (pm_or_node_t) {
5618 .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
5619 .left = left,
5620 .right = right,
5621 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5622 };
5623
5624 return node;
5625}
5626
5630static pm_parameters_node_t *
5631pm_parameters_node_create(pm_parser_t *parser) {
5632 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
5633
5634 *node = (pm_parameters_node_t) {
5635 .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
5636 .rest = NULL,
5637 .keyword_rest = NULL,
5638 .block = NULL,
5639 .requireds = { 0 },
5640 .optionals = { 0 },
5641 .posts = { 0 },
5642 .keywords = { 0 }
5643 };
5644
5645 return node;
5646}
5647
5651static void
5652pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5653 if (params->base.location.start == NULL) {
5654 params->base.location.start = param->location.start;
5655 } else {
5656 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
5657 }
5658
5659 if (params->base.location.end == NULL) {
5660 params->base.location.end = param->location.end;
5661 } else {
5662 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
5663 }
5664}
5665
5669static void
5670pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
5671 pm_parameters_node_location_set(params, param);
5672 pm_node_list_append(&params->requireds, param);
5673}
5674
5678static void
5679pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5680 pm_parameters_node_location_set(params, UP(param));
5681 pm_node_list_append(&params->optionals, UP(param));
5682}
5683
5687static void
5688pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
5689 pm_parameters_node_location_set(params, param);
5690 pm_node_list_append(&params->posts, param);
5691}
5692
5696static void
5697pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5698 pm_parameters_node_location_set(params, param);
5699 params->rest = param;
5700}
5701
5705static void
5706pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
5707 pm_parameters_node_location_set(params, param);
5708 pm_node_list_append(&params->keywords, param);
5709}
5710
5714static void
5715pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5716 assert(params->keyword_rest == NULL);
5717 pm_parameters_node_location_set(params, param);
5718 params->keyword_rest = param;
5719}
5720
5724static void
5725pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
5726 assert(params->block == NULL);
5727 pm_parameters_node_location_set(params, UP(param));
5728 params->block = param;
5729}
5730
5734static pm_program_node_t *
5735pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5736 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
5737
5738 *node = (pm_program_node_t) {
5739 .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
5740 .locals = *locals,
5741 .statements = statements
5742 };
5743
5744 return node;
5745}
5746
5750static pm_parentheses_node_t *
5751pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5752 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
5753
5754 *node = (pm_parentheses_node_t) {
5755 .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
5756 .body = body,
5757 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5758 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5759 };
5760
5761 return node;
5762}
5763
5767static pm_pinned_expression_node_t *
5768pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5769 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
5770
5771 *node = (pm_pinned_expression_node_t) {
5772 .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
5773 .expression = expression,
5774 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5775 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
5776 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
5777 };
5778
5779 return node;
5780}
5781
5785static pm_pinned_variable_node_t *
5786pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5787 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
5788
5789 *node = (pm_pinned_variable_node_t) {
5790 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
5791 .variable = variable,
5792 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5793 };
5794
5795 return node;
5796}
5797
5801static pm_post_execution_node_t *
5802pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5803 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
5804
5805 *node = (pm_post_execution_node_t) {
5806 .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
5807 .statements = statements,
5808 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5809 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5810 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5811 };
5812
5813 return node;
5814}
5815
5819static pm_pre_execution_node_t *
5820pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5821 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
5822
5823 *node = (pm_pre_execution_node_t) {
5824 .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
5825 .statements = statements,
5826 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5827 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5828 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5829 };
5830
5831 return node;
5832}
5833
5837static pm_range_node_t *
5838pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5839 pm_assert_value_expression(parser, left);
5840 pm_assert_value_expression(parser, right);
5841
5842 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
5843 pm_node_flags_t flags = 0;
5844
5845 // Indicate that this node is an exclusive range if the operator is `...`.
5846 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5847 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5848 }
5849
5850 // Indicate that this node is a static literal (i.e., can be compiled with
5851 // a putobject in CRuby) if the left and right are implicit nil, explicit
5852 // nil, or integers.
5853 if (
5854 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5855 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5856 ) {
5857 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5858 }
5859
5860 *node = (pm_range_node_t) {
5861 .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
5862 .left = left,
5863 .right = right,
5864 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5865 };
5866
5867 return node;
5868}
5869
5873static pm_redo_node_t *
5874pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5875 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5876 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
5877
5878 *node = (pm_redo_node_t) {
5879 .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
5880 };
5881
5882 return node;
5883}
5884
5889static pm_regular_expression_node_t *
5890pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5891 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
5892 pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
5893
5894 *node = (pm_regular_expression_node_t) {
5895 .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
5896 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5897 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5898 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5899 .unescaped = *unescaped
5900 };
5901
5902 return node;
5903}
5904
5908static inline pm_regular_expression_node_t *
5909pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5910 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5911}
5912
5916static pm_required_parameter_node_t *
5917pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5918 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
5919
5920 *node = (pm_required_parameter_node_t) {
5921 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
5922 .name = pm_parser_constant_id_token(parser, token)
5923 };
5924
5925 return node;
5926}
5927
5931static pm_rescue_modifier_node_t *
5932pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5933 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
5934
5935 *node = (pm_rescue_modifier_node_t) {
5936 .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
5937 .expression = expression,
5938 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5939 .rescue_expression = rescue_expression
5940 };
5941
5942 return node;
5943}
5944
5948static pm_rescue_node_t *
5949pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5950 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
5951
5952 *node = (pm_rescue_node_t) {
5953 .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
5954 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5955 .operator_loc = { 0 },
5956 .then_keyword_loc = { 0 },
5957 .reference = NULL,
5958 .statements = NULL,
5959 .subsequent = NULL,
5960 .exceptions = { 0 }
5961 };
5962
5963 return node;
5964}
5965
5966static inline void
5967pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
5968 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
5969}
5970
5974static void
5975pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5976 node->reference = reference;
5977 node->base.location.end = reference->location.end;
5978}
5979
5983static void
5984pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
5985 node->statements = statements;
5986 if (pm_statements_node_body_length(statements) > 0) {
5987 node->base.location.end = statements->base.location.end;
5988 }
5989}
5990
5994static void
5995pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
5996 node->subsequent = subsequent;
5997 node->base.location.end = subsequent->base.location.end;
5998}
5999
6003static void
6004pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6005 pm_node_list_append(&node->exceptions, exception);
6006 node->base.location.end = exception->location.end;
6007}
6008
6012static pm_rest_parameter_node_t *
6013pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6014 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6015
6016 *node = (pm_rest_parameter_node_t) {
6017 .base = (
6018 (name->type == PM_TOKEN_NOT_PROVIDED)
6019 ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
6020 : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
6021 ),
6022 .name = pm_parser_optional_constant_id_token(parser, name),
6023 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6024 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6025 };
6026
6027 return node;
6028}
6029
6033static pm_retry_node_t *
6034pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6035 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6036 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6037
6038 *node = (pm_retry_node_t) {
6039 .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
6040 };
6041
6042 return node;
6043}
6044
6048static pm_return_node_t *
6049pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6050 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6051
6052 *node = (pm_return_node_t) {
6053 .base = (
6054 (arguments == NULL)
6055 ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
6056 : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
6057 ),
6058 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6059 .arguments = arguments
6060 };
6061
6062 return node;
6063}
6064
6068static pm_self_node_t *
6069pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6070 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6071 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6072
6073 *node = (pm_self_node_t) {
6074 .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
6075 };
6076
6077 return node;
6078}
6079
6083static pm_shareable_constant_node_t *
6084pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6085 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6086
6087 *node = (pm_shareable_constant_node_t) {
6088 .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
6089 .write = write
6090 };
6091
6092 return node;
6093}
6094
6098static pm_singleton_class_node_t *
6099pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6100 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6101
6102 *node = (pm_singleton_class_node_t) {
6103 .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
6104 .locals = *locals,
6105 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6106 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6107 .expression = expression,
6108 .body = body,
6109 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6110 };
6111
6112 return node;
6113}
6114
6118static pm_source_encoding_node_t *
6119pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6120 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6121 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6122
6123 *node = (pm_source_encoding_node_t) {
6124 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6125 };
6126
6127 return node;
6128}
6129
6133static pm_source_file_node_t*
6134pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6135 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6136 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6137
6138 pm_node_flags_t flags = 0;
6139
6140 switch (parser->frozen_string_literal) {
6141 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6142 flags |= PM_STRING_FLAGS_MUTABLE;
6143 break;
6144 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6145 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6146 break;
6147 }
6148
6149 *node = (pm_source_file_node_t) {
6150 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
6151 .filepath = parser->filepath
6152 };
6153
6154 return node;
6155}
6156
6160static pm_source_line_node_t *
6161pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6162 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6163 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6164
6165 *node = (pm_source_line_node_t) {
6166 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6167 };
6168
6169 return node;
6170}
6171
6175static pm_splat_node_t *
6176pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6177 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6178
6179 *node = (pm_splat_node_t) {
6180 .base = (
6181 (expression == NULL)
6182 ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
6183 : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
6184 ),
6185 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6186 .expression = expression
6187 };
6188
6189 return node;
6190}
6191
6195static pm_statements_node_t *
6196pm_statements_node_create(pm_parser_t *parser) {
6197 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6198
6199 *node = (pm_statements_node_t) {
6200 .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
6201 .body = { 0 }
6202 };
6203
6204 return node;
6205}
6206
6210static size_t
6211pm_statements_node_body_length(pm_statements_node_t *node) {
6212 return node && node->body.size;
6213}
6214
6218static void
6219pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6220 node->base.location = (pm_location_t) { .start = start, .end = end };
6221}
6222
6227static inline void
6228pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6229 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6230 node->base.location.start = statement->location.start;
6231 }
6232
6233 if (statement->location.end > node->base.location.end) {
6234 node->base.location.end = statement->location.end;
6235 }
6236}
6237
6241static void
6242pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6243 pm_statements_node_body_update(node, statement);
6244
6245 if (node->body.size > 0) {
6246 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6247
6248 switch (PM_NODE_TYPE(previous)) {
6249 case PM_BREAK_NODE:
6250 case PM_NEXT_NODE:
6251 case PM_REDO_NODE:
6252 case PM_RETRY_NODE:
6253 case PM_RETURN_NODE:
6254 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6255 break;
6256 default:
6257 break;
6258 }
6259 }
6260
6261 pm_node_list_append(&node->body, statement);
6262 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6263}
6264
6268static void
6269pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
6270 pm_statements_node_body_update(node, statement);
6271 pm_node_list_prepend(&node->body, statement);
6272 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6273}
6274
6278static inline pm_string_node_t *
6279pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6280 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
6281 pm_node_flags_t flags = 0;
6282
6283 switch (parser->frozen_string_literal) {
6284 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6285 flags = PM_STRING_FLAGS_MUTABLE;
6286 break;
6287 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6288 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6289 break;
6290 }
6291
6292 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
6293 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
6294
6295 *node = (pm_string_node_t) {
6296 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
6297 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6298 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6299 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6300 .unescaped = *string
6301 };
6302
6303 return node;
6304}
6305
6309static pm_string_node_t *
6310pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6311 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6312}
6313
6318static pm_string_node_t *
6319pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6320 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6321 parser->current_string = PM_STRING_EMPTY;
6322 return node;
6323}
6324
6328static pm_super_node_t *
6329pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6330 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6331 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
6332
6333 const uint8_t *end = pm_arguments_end(arguments);
6334 if (end == NULL) {
6335 assert(false && "unreachable");
6336 }
6337
6338 *node = (pm_super_node_t) {
6339 .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
6340 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6341 .lparen_loc = arguments->opening_loc,
6342 .arguments = arguments->arguments,
6343 .rparen_loc = arguments->closing_loc,
6344 .block = arguments->block
6345 };
6346
6347 return node;
6348}
6349
6354static bool
6355pm_ascii_only_p(const pm_string_t *contents) {
6356 const size_t length = pm_string_length(contents);
6357 const uint8_t *source = pm_string_source(contents);
6358
6359 for (size_t index = 0; index < length; index++) {
6360 if (source[index] & 0x80) return false;
6361 }
6362
6363 return true;
6364}
6365
6369static void
6370parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6371 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6372 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6373
6374 if (width == 0) {
6375 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6376 break;
6377 }
6378
6379 cursor += width;
6380 }
6381}
6382
6387static void
6388parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6389 const pm_encoding_t *encoding = parser->encoding;
6390
6391 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6392 size_t width = encoding->char_width(cursor, end - cursor);
6393
6394 if (width == 0) {
6395 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6396 break;
6397 }
6398
6399 cursor += width;
6400 }
6401}
6402
6412static inline pm_node_flags_t
6413parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6414 if (parser->explicit_encoding != NULL) {
6415 // A Symbol may optionally have its encoding explicitly set. This will
6416 // happen if an escape sequence results in a non-ASCII code point.
6417 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6418 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6419 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6420 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6421 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6422 } else if (validate) {
6423 parse_symbol_encoding_validate_other(parser, location, contents);
6424 }
6425 } else if (pm_ascii_only_p(contents)) {
6426 // Ruby stipulates that all source files must use an ASCII-compatible
6427 // encoding. Thus, all symbols appearing in source are eligible for
6428 // "downgrading" to US-ASCII.
6429 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6430 } else if (validate) {
6431 parse_symbol_encoding_validate_other(parser, location, contents);
6432 }
6433
6434 return 0;
6435}
6436
6437static pm_node_flags_t
6438parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6439 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6440 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6441 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6442 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6443
6444 // There's special validation logic used if a string does not contain any character escape sequences.
6445 if (parser->explicit_encoding == NULL) {
6446 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6447 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6448 // the US-ASCII encoding.
6449 if (ascii_only) {
6450 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6451 }
6452
6453 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6454 if (!ascii_only) {
6455 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6456 }
6457 } else if (parser->encoding != modifier_encoding) {
6458 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6459
6460 if (modifier == 'n' && !ascii_only) {
6461 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6462 }
6463 }
6464
6465 return flags;
6466 }
6467
6468 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6469 bool mixed_encoding = false;
6470
6471 if (mixed_encoding) {
6472 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6473 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6474 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6475 bool valid_string_in_modifier_encoding = true;
6476
6477 if (!valid_string_in_modifier_encoding) {
6478 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6479 }
6480 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6481 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6482 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6483 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6484 }
6485 }
6486
6487 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6488 return flags;
6489}
6490
6497static pm_node_flags_t
6498parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6499 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6500 bool valid_unicode_range = true;
6501 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6502 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6503 return flags;
6504 }
6505
6506 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6507 // to multi-byte characters are allowed.
6508 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6509 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6510 // following error message appearing twice. We do the same for compatibility.
6511 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6512 }
6513
6522 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6523 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6524 }
6525
6526 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6527 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6528 }
6529
6530 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6531 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6532 }
6533
6534 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6535 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6536 }
6537
6538 // At this point no encoding modifiers will be present on the regular expression as they would have already
6539 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6540 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6541 if (ascii_only) {
6542 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6543 }
6544
6545 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6546 // or by specifying a modifier.
6547 //
6548 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6549 if (parser->explicit_encoding != NULL) {
6550 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6551 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6552 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6553 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6554 }
6555 }
6556
6557 return 0;
6558}
6559
6564static pm_symbol_node_t *
6565pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6566 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6567
6568 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
6569 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
6570
6571 *node = (pm_symbol_node_t) {
6572 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
6573 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6574 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
6575 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6576 .unescaped = *unescaped
6577 };
6578
6579 return node;
6580}
6581
6585static inline pm_symbol_node_t *
6586pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6587 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6588}
6589
6593static pm_symbol_node_t *
6594pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6595 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6596 parser->current_string = PM_STRING_EMPTY;
6597 return node;
6598}
6599
6603static pm_symbol_node_t *
6604pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6605 pm_symbol_node_t *node;
6606
6607 switch (token->type) {
6608 case PM_TOKEN_LABEL: {
6609 pm_token_t opening = not_provided(parser);
6610 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6611
6612 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6613 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6614
6615 assert((label.end - label.start) >= 0);
6616 pm_string_shared_init(&node->unescaped, label.start, label.end);
6617 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6618
6619 break;
6620 }
6621 case PM_TOKEN_MISSING: {
6622 pm_token_t opening = not_provided(parser);
6623 pm_token_t closing = not_provided(parser);
6624
6625 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
6626 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6627 break;
6628 }
6629 default:
6630 assert(false && "unreachable");
6631 node = NULL;
6632 break;
6633 }
6634
6635 return node;
6636}
6637
6641static pm_symbol_node_t *
6642pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6643 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6644
6645 *node = (pm_symbol_node_t) {
6646 .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
6647 .value_loc = PM_LOCATION_NULL_VALUE(parser),
6648 .unescaped = { 0 }
6649 };
6650
6651 pm_string_constant_init(&node->unescaped, content, strlen(content));
6652 return node;
6653}
6654
6658static bool
6659pm_symbol_node_label_p(pm_node_t *node) {
6660 const uint8_t *end = NULL;
6661
6662 switch (PM_NODE_TYPE(node)) {
6663 case PM_SYMBOL_NODE:
6664 end = ((pm_symbol_node_t *) node)->closing_loc.end;
6665 break;
6666 case PM_INTERPOLATED_SYMBOL_NODE:
6667 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
6668 break;
6669 default:
6670 return false;
6671 }
6672
6673 return (end != NULL) && (end[-1] == ':');
6674}
6675
6679static pm_symbol_node_t *
6680pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6681 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6682
6683 *new_node = (pm_symbol_node_t) {
6684 .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
6685 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6686 .value_loc = node->content_loc,
6687 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6688 .unescaped = node->unescaped
6689 };
6690
6691 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
6692 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6693
6694 // We are explicitly _not_ using pm_node_destroy here because we don't want
6695 // to trash the unescaped string. We could instead copy the string if we
6696 // know that it is owned, but we're taking the fast path for now.
6697 xfree(node);
6698
6699 return new_node;
6700}
6701
6705static pm_string_node_t *
6706pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6707 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
6708 pm_node_flags_t flags = 0;
6709
6710 switch (parser->frozen_string_literal) {
6711 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6712 flags = PM_STRING_FLAGS_MUTABLE;
6713 break;
6714 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6715 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6716 break;
6717 }
6718
6719 *new_node = (pm_string_node_t) {
6720 .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
6721 .opening_loc = node->opening_loc,
6722 .content_loc = node->value_loc,
6723 .closing_loc = node->closing_loc,
6724 .unescaped = node->unescaped
6725 };
6726
6727 // We are explicitly _not_ using pm_node_destroy here because we don't want
6728 // to trash the unescaped string. We could instead copy the string if we
6729 // know that it is owned, but we're taking the fast path for now.
6730 xfree(node);
6731
6732 return new_node;
6733}
6734
6738static pm_true_node_t *
6739pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6740 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6741 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6742
6743 *node = (pm_true_node_t) {
6744 .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6745 };
6746
6747 return node;
6748}
6749
6753static pm_true_node_t *
6754pm_true_node_synthesized_create(pm_parser_t *parser) {
6755 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6756
6757 *node = (pm_true_node_t) {
6758 .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
6759 };
6760
6761 return node;
6762}
6763
6767static pm_undef_node_t *
6768pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6769 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6770 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
6771
6772 *node = (pm_undef_node_t) {
6773 .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
6774 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
6775 .names = { 0 }
6776 };
6777
6778 return node;
6779}
6780
6784static void
6785pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
6786 node->base.location.end = name->location.end;
6787 pm_node_list_append(&node->names, name);
6788}
6789
6793static pm_unless_node_t *
6794pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6795 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6796
6797 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6798 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6799
6800 *node = (pm_unless_node_t) {
6801 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
6802 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6803 .predicate = predicate,
6804 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
6805 .statements = statements,
6806 .else_clause = NULL,
6807 .end_keyword_loc = { 0 }
6808 };
6809
6810 return node;
6811}
6812
6816static pm_unless_node_t *
6817pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6818 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6819 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6820
6821 pm_statements_node_t *statements = pm_statements_node_create(parser);
6822 pm_statements_node_body_append(parser, statements, statement, true);
6823
6824 *node = (pm_unless_node_t) {
6825 .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
6826 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
6827 .predicate = predicate,
6828 .then_keyword_loc = { 0 },
6829 .statements = statements,
6830 .else_clause = NULL,
6831 .end_keyword_loc = { 0 }
6832 };
6833
6834 return node;
6835}
6836
6837static inline void
6838pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
6839 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
6840 node->base.location.end = end_keyword->end;
6841}
6842
6848static void
6849pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6850 assert(parser->current_block_exits != NULL);
6851
6852 // All of the block exits that we want to remove should be within the
6853 // statements, and since we are modifying the statements, we shouldn't have
6854 // to check the end location.
6855 const uint8_t *start = statements->base.location.start;
6856
6857 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6858 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6859 if (block_exit->location.start < start) break;
6860
6861 // Implicitly remove from the list by lowering the size.
6862 parser->current_block_exits->size--;
6863 }
6864}
6865
6869static pm_until_node_t *
6870pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6871 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6872 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6873
6874 *node = (pm_until_node_t) {
6875 .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
6876 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6877 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6878 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6879 .predicate = predicate,
6880 .statements = statements
6881 };
6882
6883 return node;
6884}
6885
6889static pm_until_node_t *
6890pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6891 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6892 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6893 pm_loop_modifier_block_exits(parser, statements);
6894
6895 *node = (pm_until_node_t) {
6896 .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
6897 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6898 .do_keyword_loc = { 0 },
6899 .closing_loc = { 0 },
6900 .predicate = predicate,
6901 .statements = statements
6902 };
6903
6904 return node;
6905}
6906
6910static pm_when_node_t *
6911pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6912 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
6913
6914 *node = (pm_when_node_t) {
6915 .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
6916 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6917 .statements = NULL,
6918 .then_keyword_loc = { 0 },
6919 .conditions = { 0 }
6920 };
6921
6922 return node;
6923}
6924
6928static void
6929pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
6930 node->base.location.end = condition->location.end;
6931 pm_node_list_append(&node->conditions, condition);
6932}
6933
6937static inline void
6938pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
6939 node->base.location.end = then_keyword->end;
6940 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
6941}
6942
6946static void
6947pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6948 if (statements->base.location.end > node->base.location.end) {
6949 node->base.location.end = statements->base.location.end;
6950 }
6951
6952 node->statements = statements;
6953}
6954
6958static pm_while_node_t *
6959pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6960 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6961 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6962
6963 *node = (pm_while_node_t) {
6964 .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
6965 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6966 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6967 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6968 .predicate = predicate,
6969 .statements = statements
6970 };
6971
6972 return node;
6973}
6974
6978static pm_while_node_t *
6979pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6980 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6981 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6982 pm_loop_modifier_block_exits(parser, statements);
6983
6984 *node = (pm_while_node_t) {
6985 .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
6986 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6987 .do_keyword_loc = { 0 },
6988 .closing_loc = { 0 },
6989 .predicate = predicate,
6990 .statements = statements
6991 };
6992
6993 return node;
6994}
6995
6999static pm_while_node_t *
7000pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7001 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7002
7003 *node = (pm_while_node_t) {
7004 .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
7005 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7006 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7007 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7008 .predicate = predicate,
7009 .statements = statements
7010 };
7011
7012 return node;
7013}
7014
7019static pm_x_string_node_t *
7020pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7021 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7022
7023 *node = (pm_x_string_node_t) {
7024 .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
7025 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7026 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7027 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7028 .unescaped = *unescaped
7029 };
7030
7031 return node;
7032}
7033
7037static inline pm_x_string_node_t *
7038pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7039 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7040}
7041
7045static pm_yield_node_t *
7046pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7047 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7048
7049 const uint8_t *end;
7050 if (rparen_loc->start != NULL) {
7051 end = rparen_loc->end;
7052 } else if (arguments != NULL) {
7053 end = arguments->base.location.end;
7054 } else if (lparen_loc->start != NULL) {
7055 end = lparen_loc->end;
7056 } else {
7057 end = keyword->end;
7058 }
7059
7060 *node = (pm_yield_node_t) {
7061 .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
7062 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7063 .lparen_loc = *lparen_loc,
7064 .arguments = arguments,
7065 .rparen_loc = *rparen_loc
7066 };
7067
7068 return node;
7069}
7070
7075static int
7076pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7077 pm_scope_t *scope = parser->current_scope;
7078 int depth = 0;
7079
7080 while (scope != NULL) {
7081 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7082 if (scope->closed) break;
7083
7084 scope = scope->previous;
7085 depth++;
7086 }
7087
7088 return -1;
7089}
7090
7096static inline int
7097pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7098 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7099}
7100
7104static inline void
7105pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7106 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7107}
7108
7112static pm_constant_id_t
7113pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7114 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7115 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7116 return constant_id;
7117}
7118
7122static inline pm_constant_id_t
7123pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7124 return pm_parser_local_add_location(parser, token->start, token->end, reads);
7125}
7126
7130static pm_constant_id_t
7131pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7132 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7133 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7134 return constant_id;
7135}
7136
7140static pm_constant_id_t
7141pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7142 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7143 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7144 return constant_id;
7145}
7146
7154static bool
7155pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7156 // We want to check whether the parameter name is a numbered parameter or
7157 // not.
7158 pm_refute_numbered_parameter(parser, name->start, name->end);
7159
7160 // Otherwise we'll fetch the constant id for the parameter name and check
7161 // whether it's already in the current scope.
7162 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7163
7164 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7165 // Add an error if the parameter doesn't start with _ and has been seen before
7166 if ((name->start < name->end) && (*name->start != '_')) {
7167 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7168 }
7169 return true;
7170 }
7171 return false;
7172}
7173
7177static void
7178pm_parser_scope_pop(pm_parser_t *parser) {
7179 pm_scope_t *scope = parser->current_scope;
7180 parser->current_scope = scope->previous;
7181 pm_locals_free(&scope->locals);
7182 pm_node_list_free(&scope->implicit_parameters);
7183 xfree(scope);
7184}
7185
7186/******************************************************************************/
7187/* Stack helpers */
7188/******************************************************************************/
7189
7193static inline void
7194pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7195 *stack = (*stack << 1) | (value & 1);
7196}
7197
7201static inline void
7202pm_state_stack_pop(pm_state_stack_t *stack) {
7203 *stack >>= 1;
7204}
7205
7209static inline bool
7210pm_state_stack_p(const pm_state_stack_t *stack) {
7211 return *stack & 1;
7212}
7213
7214static inline void
7215pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7216 // Use the negation of the value to prevent stack overflow.
7217 pm_state_stack_push(&parser->accepts_block_stack, !value);
7218}
7219
7220static inline void
7221pm_accepts_block_stack_pop(pm_parser_t *parser) {
7222 pm_state_stack_pop(&parser->accepts_block_stack);
7223}
7224
7225static inline bool
7226pm_accepts_block_stack_p(pm_parser_t *parser) {
7227 return !pm_state_stack_p(&parser->accepts_block_stack);
7228}
7229
7230static inline void
7231pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7232 pm_state_stack_push(&parser->do_loop_stack, value);
7233}
7234
7235static inline void
7236pm_do_loop_stack_pop(pm_parser_t *parser) {
7237 pm_state_stack_pop(&parser->do_loop_stack);
7238}
7239
7240static inline bool
7241pm_do_loop_stack_p(pm_parser_t *parser) {
7242 return pm_state_stack_p(&parser->do_loop_stack);
7243}
7244
7245/******************************************************************************/
7246/* Lexer check helpers */
7247/******************************************************************************/
7248
7253static inline uint8_t
7254peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7255 if (cursor < parser->end) {
7256 return *cursor;
7257 } else {
7258 return '\0';
7259 }
7260}
7261
7267static inline uint8_t
7268peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7269 return peek_at(parser, parser->current.end + offset);
7270}
7271
7276static inline uint8_t
7277peek(const pm_parser_t *parser) {
7278 return peek_at(parser, parser->current.end);
7279}
7280
7285static inline bool
7286match(pm_parser_t *parser, uint8_t value) {
7287 if (peek(parser) == value) {
7288 parser->current.end++;
7289 return true;
7290 }
7291 return false;
7292}
7293
7298static inline size_t
7299match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7300 if (peek_at(parser, cursor) == '\n') {
7301 return 1;
7302 }
7303 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7304 return 2;
7305 }
7306 return 0;
7307}
7308
7314static inline size_t
7315match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7316 return match_eol_at(parser, parser->current.end + offset);
7317}
7318
7324static inline size_t
7325match_eol(pm_parser_t *parser) {
7326 return match_eol_at(parser, parser->current.end);
7327}
7328
7332static inline const uint8_t *
7333next_newline(const uint8_t *cursor, ptrdiff_t length) {
7334 assert(length >= 0);
7335
7336 // Note that it's okay for us to use memchr here to look for \n because none
7337 // of the encodings that we support have \n as a component of a multi-byte
7338 // character.
7339 return memchr(cursor, '\n', (size_t) length);
7340}
7341
7345static inline bool
7346ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7347 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7348}
7349
7354static bool
7355parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7356 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7357
7358 if (encoding != NULL) {
7359 if (parser->encoding != encoding) {
7360 parser->encoding = encoding;
7361 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7362 }
7363
7364 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7365 return true;
7366 }
7367
7368 return false;
7369}
7370
7375static void
7376parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7377 const uint8_t *cursor = parser->current.start + 1;
7378 const uint8_t *end = parser->current.end;
7379
7380 bool separator = false;
7381 while (true) {
7382 if (end - cursor <= 6) return;
7383 switch (cursor[6]) {
7384 case 'C': case 'c': cursor += 6; continue;
7385 case 'O': case 'o': cursor += 5; continue;
7386 case 'D': case 'd': cursor += 4; continue;
7387 case 'I': case 'i': cursor += 3; continue;
7388 case 'N': case 'n': cursor += 2; continue;
7389 case 'G': case 'g': cursor += 1; continue;
7390 case '=': case ':':
7391 separator = true;
7392 cursor += 6;
7393 break;
7394 default:
7395 cursor += 6;
7396 if (pm_char_is_whitespace(*cursor)) break;
7397 continue;
7398 }
7399 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7400 separator = false;
7401 }
7402
7403 while (true) {
7404 do {
7405 if (++cursor >= end) return;
7406 } while (pm_char_is_whitespace(*cursor));
7407
7408 if (separator) break;
7409 if (*cursor != '=' && *cursor != ':') return;
7410
7411 separator = true;
7412 cursor++;
7413 }
7414
7415 const uint8_t *value_start = cursor;
7416 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7417
7418 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7419 // If we were unable to parse the encoding value, then we've got an
7420 // issue because we didn't understand the encoding that the user was
7421 // trying to use. In this case we'll keep using the default encoding but
7422 // add an error to the parser to indicate an unsuccessful parse.
7423 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7424 }
7425}
7426
7427typedef enum {
7428 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7429 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7430 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7431} pm_magic_comment_boolean_value_t;
7432
7437static pm_magic_comment_boolean_value_t
7438parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7439 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7440 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7441 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7442 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7443 } else {
7444 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7445 }
7446}
7447
7448static inline bool
7449pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7450 return b == '\'' || b == '"' || b == ':' || b == ';';
7451}
7452
7458static inline const uint8_t *
7459parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7460 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7461 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7462 return cursor;
7463 }
7464 cursor++;
7465 }
7466 return NULL;
7467}
7468
7479static inline bool
7480parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7481 bool result = true;
7482
7483 const uint8_t *start = parser->current.start + 1;
7484 const uint8_t *end = parser->current.end;
7485 if (end - start <= 7) return false;
7486
7487 const uint8_t *cursor;
7488 bool indicator = false;
7489
7490 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7491 start = cursor + 3;
7492
7493 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7494 end = cursor;
7495 indicator = true;
7496 } else {
7497 // If we have a start marker but not an end marker, then we cannot
7498 // have a magic comment.
7499 return false;
7500 }
7501 }
7502
7503 cursor = start;
7504 while (cursor < end) {
7505 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7506
7507 const uint8_t *key_start = cursor;
7508 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7509
7510 const uint8_t *key_end = cursor;
7511 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7512 if (cursor == end) break;
7513
7514 if (*cursor == ':') {
7515 cursor++;
7516 } else {
7517 if (!indicator) return false;
7518 continue;
7519 }
7520
7521 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7522 if (cursor == end) break;
7523
7524 const uint8_t *value_start;
7525 const uint8_t *value_end;
7526
7527 if (*cursor == '"') {
7528 value_start = ++cursor;
7529 for (; cursor < end && *cursor != '"'; cursor++) {
7530 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7531 }
7532 value_end = cursor;
7533 if (cursor < end && *cursor == '"') cursor++;
7534 } else {
7535 value_start = cursor;
7536 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7537 value_end = cursor;
7538 }
7539
7540 if (indicator) {
7541 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7542 } else {
7543 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7544 if (cursor != end) return false;
7545 }
7546
7547 // Here, we need to do some processing on the key to swap out dashes for
7548 // underscores. We only need to do this if there _is_ a dash in the key.
7549 pm_string_t key;
7550 const size_t key_length = (size_t) (key_end - key_start);
7551 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7552
7553 if (dash == NULL) {
7554 pm_string_shared_init(&key, key_start, key_end);
7555 } else {
7556 uint8_t *buffer = xmalloc(key_length);
7557 if (buffer == NULL) break;
7558
7559 memcpy(buffer, key_start, key_length);
7560 buffer[dash - key_start] = '_';
7561
7562 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7563 buffer[dash - key_start] = '_';
7564 }
7565
7566 pm_string_owned_init(&key, buffer, key_length);
7567 }
7568
7569 // Finally, we can start checking the key against the list of known
7570 // magic comment keys, and potentially change state based on that.
7571 const uint8_t *key_source = pm_string_source(&key);
7572 uint32_t value_length = (uint32_t) (value_end - value_start);
7573
7574 // We only want to attempt to compare against encoding comments if it's
7575 // the first line in the file (or the second in the case of a shebang).
7576 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7577 if (
7578 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7579 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7580 ) {
7581 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7582 }
7583 }
7584
7585 if (key_length == 11) {
7586 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7587 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7588 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7589 PM_PARSER_WARN_TOKEN_FORMAT(
7590 parser,
7591 parser->current,
7592 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7593 (int) key_length,
7594 (const char *) key_source,
7595 (int) value_length,
7596 (const char *) value_start
7597 );
7598 break;
7599 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7600 parser->warn_mismatched_indentation = false;
7601 break;
7602 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7603 parser->warn_mismatched_indentation = true;
7604 break;
7605 }
7606 }
7607 } else if (key_length == 21) {
7608 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7609 // We only want to handle frozen string literal comments if it's
7610 // before any semantic tokens have been seen.
7611 if (semantic_token_seen) {
7612 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7613 } else {
7614 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7615 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7616 PM_PARSER_WARN_TOKEN_FORMAT(
7617 parser,
7618 parser->current,
7619 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7620 (int) key_length,
7621 (const char *) key_source,
7622 (int) value_length,
7623 (const char *) value_start
7624 );
7625 break;
7626 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7628 break;
7629 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7631 break;
7632 }
7633 }
7634 }
7635 } else if (key_length == 24) {
7636 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7637 const uint8_t *cursor = parser->current.start;
7638 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7639
7640 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7641 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7642 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7643 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7644 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7645 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7646 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7647 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7648 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7649 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7650 } else {
7651 PM_PARSER_WARN_TOKEN_FORMAT(
7652 parser,
7653 parser->current,
7654 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7655 (int) key_length,
7656 (const char *) key_source,
7657 (int) value_length,
7658 (const char *) value_start
7659 );
7660 }
7661 }
7662 }
7663
7664 // When we're done, we want to free the string in case we had to
7665 // allocate memory for it.
7666 pm_string_free(&key);
7667
7668 // Allocate a new magic comment node to append to the parser's list.
7670 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7671 magic_comment->key_start = key_start;
7672 magic_comment->value_start = value_start;
7673 magic_comment->key_length = (uint32_t) key_length;
7674 magic_comment->value_length = value_length;
7675 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7676 }
7677 }
7678
7679 return result;
7680}
7681
7682/******************************************************************************/
7683/* Context manipulations */
7684/******************************************************************************/
7685
7686static const uint32_t context_terminators[] = {
7687 [PM_CONTEXT_NONE] = 0,
7688 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7689 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7690 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7691 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7692 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7693 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7694 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7695 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7696 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7697 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7698 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7699 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7700 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7701 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7702 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7703 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7704 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7705 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7706 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7707 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7708 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7709 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7710 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7711 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7712 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7713 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7714 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7715 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7716 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7717 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7718 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7719 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7720 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7721 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7722 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7723 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7724 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7725 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7726 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7727 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7728 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7729 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7730 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7731 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7732 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7733 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7734 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7735 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7736 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7737 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7738 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7739 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7740 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7741 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7742};
7743
7744static inline bool
7745context_terminator(pm_context_t context, pm_token_t *token) {
7746 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7747}
7748
7753static pm_context_t
7754context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7755 pm_context_node_t *context_node = parser->current_context;
7756
7757 while (context_node != NULL) {
7758 if (context_terminator(context_node->context, token)) return context_node->context;
7759 context_node = context_node->prev;
7760 }
7761
7762 return PM_CONTEXT_NONE;
7763}
7764
7765static bool
7766context_push(pm_parser_t *parser, pm_context_t context) {
7767 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7768 if (context_node == NULL) return false;
7769
7770 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7771
7772 if (parser->current_context == NULL) {
7773 parser->current_context = context_node;
7774 } else {
7775 context_node->prev = parser->current_context;
7776 parser->current_context = context_node;
7777 }
7778
7779 return true;
7780}
7781
7782static void
7783context_pop(pm_parser_t *parser) {
7784 pm_context_node_t *prev = parser->current_context->prev;
7785 xfree(parser->current_context);
7786 parser->current_context = prev;
7787}
7788
7789static bool
7790context_p(const pm_parser_t *parser, pm_context_t context) {
7791 pm_context_node_t *context_node = parser->current_context;
7792
7793 while (context_node != NULL) {
7794 if (context_node->context == context) return true;
7795 context_node = context_node->prev;
7796 }
7797
7798 return false;
7799}
7800
7801static bool
7802context_def_p(const pm_parser_t *parser) {
7803 pm_context_node_t *context_node = parser->current_context;
7804
7805 while (context_node != NULL) {
7806 switch (context_node->context) {
7807 case PM_CONTEXT_DEF:
7812 return true;
7813 case PM_CONTEXT_CLASS:
7817 case PM_CONTEXT_MODULE:
7821 case PM_CONTEXT_SCLASS:
7825 return false;
7826 default:
7827 context_node = context_node->prev;
7828 }
7829 }
7830
7831 return false;
7832}
7833
7838static const char *
7839context_human(pm_context_t context) {
7840 switch (context) {
7841 case PM_CONTEXT_NONE:
7842 assert(false && "unreachable");
7843 return "";
7844 case PM_CONTEXT_BEGIN: return "begin statement";
7845 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7846 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7847 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7848 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7849 case PM_CONTEXT_CASE_IN: return "'in' clause";
7850 case PM_CONTEXT_CLASS: return "class definition";
7851 case PM_CONTEXT_DEF: return "method definition";
7852 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7853 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7854 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7855 case PM_CONTEXT_ELSE:
7862 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7863 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7864 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7871 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7872 case PM_CONTEXT_FOR: return "for loop";
7873 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7874 case PM_CONTEXT_IF: return "if statement";
7875 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7876 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7877 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7878 case PM_CONTEXT_MAIN: return "top level context";
7879 case PM_CONTEXT_MODULE: return "module definition";
7880 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7881 case PM_CONTEXT_PARENS: return "parentheses";
7882 case PM_CONTEXT_POSTEXE: return "'END' block";
7883 case PM_CONTEXT_PREDICATE: return "predicate";
7884 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7892 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7893 case PM_CONTEXT_SCLASS: return "singleton class definition";
7894 case PM_CONTEXT_TERNARY: return "ternary expression";
7895 case PM_CONTEXT_UNLESS: return "unless statement";
7896 case PM_CONTEXT_UNTIL: return "until statement";
7897 case PM_CONTEXT_WHILE: return "while statement";
7898 }
7899
7900 assert(false && "unreachable");
7901 return "";
7902}
7903
7904/******************************************************************************/
7905/* Specific token lexers */
7906/******************************************************************************/
7907
7908static inline void
7909pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7910 if (invalid != NULL) {
7911 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7912 pm_parser_err(parser, invalid, invalid + 1, diag_id);
7913 }
7914}
7915
7916static size_t
7917pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7918 const uint8_t *invalid = NULL;
7919 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7920 pm_strspn_number_validate(parser, string, length, invalid);
7921 return length;
7922}
7923
7924static size_t
7925pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7926 const uint8_t *invalid = NULL;
7927 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7928 pm_strspn_number_validate(parser, string, length, invalid);
7929 return length;
7930}
7931
7932static size_t
7933pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7934 const uint8_t *invalid = NULL;
7935 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7936 pm_strspn_number_validate(parser, string, length, invalid);
7937 return length;
7938}
7939
7940static size_t
7941pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7942 const uint8_t *invalid = NULL;
7943 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7944 pm_strspn_number_validate(parser, string, length, invalid);
7945 return length;
7946}
7947
7948static pm_token_type_t
7949lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7950 pm_token_type_t type = PM_TOKEN_INTEGER;
7951
7952 // Here we're going to attempt to parse the optional decimal portion of a
7953 // float. If it's not there, then it's okay and we'll just continue on.
7954 if (peek(parser) == '.') {
7955 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7956 parser->current.end += 2;
7957 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7958 type = PM_TOKEN_FLOAT;
7959 } else {
7960 // If we had a . and then something else, then it's not a float
7961 // suffix on a number it's a method call or something else.
7962 return type;
7963 }
7964 }
7965
7966 // Here we're going to attempt to parse the optional exponent portion of a
7967 // float. If it's not there, it's okay and we'll just continue on.
7968 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7969 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7970 parser->current.end += 2;
7971
7972 if (pm_char_is_decimal_digit(peek(parser))) {
7973 parser->current.end++;
7974 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7975 } else {
7976 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7977 }
7978 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7979 parser->current.end++;
7980 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7981 } else {
7982 return type;
7983 }
7984
7985 *seen_e = true;
7986 type = PM_TOKEN_FLOAT;
7987 }
7988
7989 return type;
7990}
7991
7992static pm_token_type_t
7993lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
7994 pm_token_type_t type = PM_TOKEN_INTEGER;
7995 *seen_e = false;
7996
7997 if (peek_offset(parser, -1) == '0') {
7998 switch (*parser->current.end) {
7999 // 0d1111 is a decimal number
8000 case 'd':
8001 case 'D':
8002 parser->current.end++;
8003 if (pm_char_is_decimal_digit(peek(parser))) {
8004 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8005 } else {
8006 match(parser, '_');
8007 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8008 }
8009
8010 break;
8011
8012 // 0b1111 is a binary number
8013 case 'b':
8014 case 'B':
8015 parser->current.end++;
8016 if (pm_char_is_binary_digit(peek(parser))) {
8017 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8018 } else {
8019 match(parser, '_');
8020 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8021 }
8022
8023 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8024 break;
8025
8026 // 0o1111 is an octal number
8027 case 'o':
8028 case 'O':
8029 parser->current.end++;
8030 if (pm_char_is_octal_digit(peek(parser))) {
8031 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8032 } else {
8033 match(parser, '_');
8034 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8035 }
8036
8037 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8038 break;
8039
8040 // 01111 is an octal number
8041 case '_':
8042 case '0':
8043 case '1':
8044 case '2':
8045 case '3':
8046 case '4':
8047 case '5':
8048 case '6':
8049 case '7':
8050 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8051 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8052 break;
8053
8054 // 0x1111 is a hexadecimal number
8055 case 'x':
8056 case 'X':
8057 parser->current.end++;
8058 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8059 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8060 } else {
8061 match(parser, '_');
8062 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8063 }
8064
8065 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8066 break;
8067
8068 // 0.xxx is a float
8069 case '.': {
8070 type = lex_optional_float_suffix(parser, seen_e);
8071 break;
8072 }
8073
8074 // 0exxx is a float
8075 case 'e':
8076 case 'E': {
8077 type = lex_optional_float_suffix(parser, seen_e);
8078 break;
8079 }
8080 }
8081 } else {
8082 // If it didn't start with a 0, then we'll lex as far as we can into a
8083 // decimal number.
8084 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8085
8086 // Afterward, we'll lex as far as we can into an optional float suffix.
8087 type = lex_optional_float_suffix(parser, seen_e);
8088 }
8089
8090 // At this point we have a completed number, but we want to provide the user
8091 // with a good experience if they put an additional .xxx fractional
8092 // component on the end, so we'll check for that here.
8093 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8094 const uint8_t *fraction_start = parser->current.end;
8095 const uint8_t *fraction_end = parser->current.end + 2;
8096 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8097 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8098 }
8099
8100 return type;
8101}
8102
8103static pm_token_type_t
8104lex_numeric(pm_parser_t *parser) {
8105 pm_token_type_t type = PM_TOKEN_INTEGER;
8106 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8107
8108 if (parser->current.end < parser->end) {
8109 bool seen_e = false;
8110 type = lex_numeric_prefix(parser, &seen_e);
8111
8112 const uint8_t *end = parser->current.end;
8113 pm_token_type_t suffix_type = type;
8114
8115 if (type == PM_TOKEN_INTEGER) {
8116 if (match(parser, 'r')) {
8117 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8118
8119 if (match(parser, 'i')) {
8120 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8121 }
8122 } else if (match(parser, 'i')) {
8123 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8124 }
8125 } else {
8126 if (!seen_e && match(parser, 'r')) {
8127 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8128
8129 if (match(parser, 'i')) {
8130 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8131 }
8132 } else if (match(parser, 'i')) {
8133 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8134 }
8135 }
8136
8137 const uint8_t b = peek(parser);
8138 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8139 parser->current.end = end;
8140 } else {
8141 type = suffix_type;
8142 }
8143 }
8144
8145 return type;
8146}
8147
8148static pm_token_type_t
8149lex_global_variable(pm_parser_t *parser) {
8150 if (parser->current.end >= parser->end) {
8151 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8152 return PM_TOKEN_GLOBAL_VARIABLE;
8153 }
8154
8155 // True if multiple characters are allowed after the declaration of the
8156 // global variable. Not true when it starts with "$-".
8157 bool allow_multiple = true;
8158
8159 switch (*parser->current.end) {
8160 case '~': // $~: match-data
8161 case '*': // $*: argv
8162 case '$': // $$: pid
8163 case '?': // $?: last status
8164 case '!': // $!: error string
8165 case '@': // $@: error position
8166 case '/': // $/: input record separator
8167 case '\\': // $\: output record separator
8168 case ';': // $;: field separator
8169 case ',': // $,: output field separator
8170 case '.': // $.: last read line number
8171 case '=': // $=: ignorecase
8172 case ':': // $:: load path
8173 case '<': // $<: reading filename
8174 case '>': // $>: default output handle
8175 case '\"': // $": already loaded files
8176 parser->current.end++;
8177 return PM_TOKEN_GLOBAL_VARIABLE;
8178
8179 case '&': // $&: last match
8180 case '`': // $`: string before last match
8181 case '\'': // $': string after last match
8182 case '+': // $+: string matches last paren.
8183 parser->current.end++;
8184 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8185
8186 case '0': {
8187 parser->current.end++;
8188 size_t width;
8189
8190 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8191 do {
8192 parser->current.end += width;
8193 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8194
8195 // $0 isn't allowed to be followed by anything.
8196 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8197 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8198 }
8199
8200 return PM_TOKEN_GLOBAL_VARIABLE;
8201 }
8202
8203 case '1':
8204 case '2':
8205 case '3':
8206 case '4':
8207 case '5':
8208 case '6':
8209 case '7':
8210 case '8':
8211 case '9':
8212 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8213 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8214
8215 case '-':
8216 parser->current.end++;
8217 allow_multiple = false;
8219 default: {
8220 size_t width;
8221
8222 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8223 do {
8224 parser->current.end += width;
8225 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8226 } else if (pm_char_is_whitespace(peek(parser))) {
8227 // If we get here, then we have a $ followed by whitespace,
8228 // which is not allowed.
8229 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8230 } else {
8231 // If we get here, then we have a $ followed by something that
8232 // isn't recognized as a global variable.
8233 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8234 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8235 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8236 }
8237
8238 return PM_TOKEN_GLOBAL_VARIABLE;
8239 }
8240 }
8241}
8242
8255static inline pm_token_type_t
8256lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8257 if (memcmp(current_start, value, vlen) == 0) {
8258 pm_lex_state_t last_state = parser->lex_state;
8259
8260 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8261 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8262 } else {
8263 lex_state_set(parser, state);
8264 if (state == PM_LEX_STATE_BEG) {
8265 parser->command_start = true;
8266 }
8267
8268 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8269 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8270 return modifier_type;
8271 }
8272 }
8273
8274 return type;
8275 }
8276
8277 return PM_TOKEN_EOF;
8278}
8279
8280static pm_token_type_t
8281lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8282 // Lex as far as we can into the current identifier.
8283 size_t width;
8284 const uint8_t *end = parser->end;
8285 const uint8_t *current_start = parser->current.start;
8286 const uint8_t *current_end = parser->current.end;
8287 bool encoding_changed = parser->encoding_changed;
8288
8289 if (encoding_changed) {
8290 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8291 current_end += width;
8292 }
8293 } else {
8294 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8295 current_end += width;
8296 }
8297 }
8298 parser->current.end = current_end;
8299
8300 // Now cache the length of the identifier so that we can quickly compare it
8301 // against known keywords.
8302 width = (size_t) (current_end - current_start);
8303
8304 if (current_end < end) {
8305 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8306 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8307 // check if we're returning the defined? keyword or just an identifier.
8308 width++;
8309
8310 if (
8311 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8312 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8313 ) {
8314 // If we're in a position where we can accept a : at the end of an
8315 // identifier, then we'll optionally accept it.
8316 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8317 (void) match(parser, ':');
8318 return PM_TOKEN_LABEL;
8319 }
8320
8321 if (parser->lex_state != PM_LEX_STATE_DOT) {
8322 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8323 return PM_TOKEN_KEYWORD_DEFINED;
8324 }
8325 }
8326
8327 return PM_TOKEN_METHOD_NAME;
8328 }
8329
8330 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8331 // If we're in a position where we can accept a = at the end of an
8332 // identifier, then we'll optionally accept it.
8333 return PM_TOKEN_IDENTIFIER;
8334 }
8335
8336 if (
8337 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8338 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8339 ) {
8340 // If we're in a position where we can accept a : at the end of an
8341 // identifier, then we'll optionally accept it.
8342 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8343 (void) match(parser, ':');
8344 return PM_TOKEN_LABEL;
8345 }
8346 }
8347
8348 if (parser->lex_state != PM_LEX_STATE_DOT) {
8349 pm_token_type_t type;
8350 switch (width) {
8351 case 2:
8352 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8353 if (pm_do_loop_stack_p(parser)) {
8354 return PM_TOKEN_KEYWORD_DO_LOOP;
8355 }
8356 return PM_TOKEN_KEYWORD_DO;
8357 }
8358
8359 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8360 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8361 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8362 break;
8363 case 3:
8364 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8365 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8366 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8367 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8368 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8369 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8370 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8371 break;
8372 case 4:
8373 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8374 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8375 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8376 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8377 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8378 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8379 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8380 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8381 break;
8382 case 5:
8383 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8384 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8385 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8386 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8387 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8388 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8389 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8390 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8391 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8392 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8393 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8394 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8395 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8396 break;
8397 case 6:
8398 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8399 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8400 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8401 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8402 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8403 break;
8404 case 8:
8405 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8406 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8407 break;
8408 case 12:
8409 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8410 break;
8411 }
8412 }
8413
8414 if (encoding_changed) {
8415 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8416 }
8417 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8418}
8419
8424static bool
8425current_token_starts_line(pm_parser_t *parser) {
8426 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8427}
8428
8443static pm_token_type_t
8444lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8445 // If there is no content following this #, then we're at the end of
8446 // the string and we can safely return string content.
8447 if (pound + 1 >= parser->end) {
8448 parser->current.end = pound + 1;
8449 return PM_TOKEN_STRING_CONTENT;
8450 }
8451
8452 // Now we'll check against the character that follows the #. If it constitutes
8453 // valid interplation, we'll handle that, otherwise we'll return
8454 // PM_TOKEN_NOT_PROVIDED.
8455 switch (pound[1]) {
8456 case '@': {
8457 // In this case we may have hit an embedded instance or class variable.
8458 if (pound + 2 >= parser->end) {
8459 parser->current.end = pound + 1;
8460 return PM_TOKEN_STRING_CONTENT;
8461 }
8462
8463 // If we're looking at a @ and there's another @, then we'll skip past the
8464 // second @.
8465 const uint8_t *variable = pound + 2;
8466 if (*variable == '@' && pound + 3 < parser->end) variable++;
8467
8468 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8469 // At this point we're sure that we've either hit an embedded instance
8470 // or class variable. In this case we'll first need to check if we've
8471 // already consumed content.
8472 if (pound > parser->current.start) {
8473 parser->current.end = pound;
8474 return PM_TOKEN_STRING_CONTENT;
8475 }
8476
8477 // Otherwise we need to return the embedded variable token
8478 // and then switch to the embedded variable lex mode.
8479 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8480 parser->current.end = pound + 1;
8481 return PM_TOKEN_EMBVAR;
8482 }
8483
8484 // If we didn't get a valid interpolation, then this is just regular
8485 // string content. This is like if we get "#@-". In this case the caller
8486 // should keep lexing.
8487 parser->current.end = pound + 1;
8488 return PM_TOKEN_NOT_PROVIDED;
8489 }
8490 case '$':
8491 // In this case we may have hit an embedded global variable. If there's
8492 // not enough room, then we'll just return string content.
8493 if (pound + 2 >= parser->end) {
8494 parser->current.end = pound + 1;
8495 return PM_TOKEN_STRING_CONTENT;
8496 }
8497
8498 // This is the character that we're going to check to see if it is the
8499 // start of an identifier that would indicate that this is a global
8500 // variable.
8501 const uint8_t *check = pound + 2;
8502
8503 if (pound[2] == '-') {
8504 if (pound + 3 >= parser->end) {
8505 parser->current.end = pound + 2;
8506 return PM_TOKEN_STRING_CONTENT;
8507 }
8508
8509 check++;
8510 }
8511
8512 // If the character that we're going to check is the start of an
8513 // identifier, or we don't have a - and the character is a decimal number
8514 // or a global name punctuation character, then we've hit an embedded
8515 // global variable.
8516 if (
8517 char_is_identifier_start(parser, check, parser->end - check) ||
8518 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8519 ) {
8520 // In this case we've hit an embedded global variable. First check to
8521 // see if we've already consumed content. If we have, then we need to
8522 // return that content as string content first.
8523 if (pound > parser->current.start) {
8524 parser->current.end = pound;
8525 return PM_TOKEN_STRING_CONTENT;
8526 }
8527
8528 // Otherwise, we need to return the embedded variable token and switch
8529 // to the embedded variable lex mode.
8530 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8531 parser->current.end = pound + 1;
8532 return PM_TOKEN_EMBVAR;
8533 }
8534
8535 // In this case we've hit a #$ that does not indicate a global variable.
8536 // In this case we'll continue lexing past it.
8537 parser->current.end = pound + 1;
8538 return PM_TOKEN_NOT_PROVIDED;
8539 case '{':
8540 // In this case it's the start of an embedded expression. If we have
8541 // already consumed content, then we need to return that content as string
8542 // content first.
8543 if (pound > parser->current.start) {
8544 parser->current.end = pound;
8545 return PM_TOKEN_STRING_CONTENT;
8546 }
8547
8548 parser->enclosure_nesting++;
8549
8550 // Otherwise we'll skip past the #{ and begin lexing the embedded
8551 // expression.
8552 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8553 parser->current.end = pound + 2;
8554 parser->command_start = true;
8555 pm_do_loop_stack_push(parser, false);
8556 return PM_TOKEN_EMBEXPR_BEGIN;
8557 default:
8558 // In this case we've hit a # that doesn't constitute interpolation. We'll
8559 // mark that by returning the not provided token type. This tells the
8560 // consumer to keep lexing forward.
8561 parser->current.end = pound + 1;
8562 return PM_TOKEN_NOT_PROVIDED;
8563 }
8564}
8565
8566static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8567static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8568static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8569static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8570static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8571
8575static const bool ascii_printable_chars[] = {
8576 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8584};
8585
8586static inline bool
8587char_is_ascii_printable(const uint8_t b) {
8588 return (b < 0x80) && ascii_printable_chars[b];
8589}
8590
8595static inline uint8_t
8596escape_hexadecimal_digit(const uint8_t value) {
8597 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8598}
8599
8605static inline uint32_t
8606escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
8607 uint32_t value = 0;
8608 for (size_t index = 0; index < length; index++) {
8609 if (index != 0) value <<= 4;
8610 value |= escape_hexadecimal_digit(string[index]);
8611 }
8612
8613 // Here we're going to verify that the value is actually a valid Unicode
8614 // codepoint and not a surrogate pair.
8615 if (value >= 0xD800 && value <= 0xDFFF) {
8616 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8617 return 0xFFFD;
8618 }
8619
8620 return value;
8621}
8622
8626static inline uint8_t
8627escape_byte(uint8_t value, const uint8_t flags) {
8628 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8629 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8630 return value;
8631}
8632
8636static inline void
8637escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8638 // \u escape sequences in string-like structures implicitly change the
8639 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8640 // literal.
8641 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8642 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8643 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8644 }
8645
8647 }
8648
8649 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8650 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
8651 pm_buffer_append_byte(buffer, 0xEF);
8652 pm_buffer_append_byte(buffer, 0xBF);
8653 pm_buffer_append_byte(buffer, 0xBD);
8654 }
8655}
8656
8661static inline void
8662escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8663 if (byte >= 0x80) {
8664 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8665 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8666 }
8667
8668 parser->explicit_encoding = parser->encoding;
8669 }
8670
8671 pm_buffer_append_byte(buffer, byte);
8672}
8673
8689static inline void
8690escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8691 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8692 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8693 }
8694
8695 escape_write_byte_encoded(parser, buffer, byte);
8696}
8697
8701static inline void
8702escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8703 size_t width;
8704 if (parser->encoding_changed) {
8705 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8706 } else {
8707 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8708 }
8709
8710 if (width == 1) {
8711 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8712 } else if (width > 1) {
8713 // Valid multibyte character. Just ignore escape.
8714 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8715 pm_buffer_append_bytes(b, parser->current.end, width);
8716 parser->current.end += width;
8717 } else {
8718 // Assume the next character wasn't meant to be part of this escape
8719 // sequence since it is invalid. Add an error and move on.
8720 parser->current.end++;
8721 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8722 }
8723}
8724
8730static void
8731escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8732#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8733
8734 PM_PARSER_WARN_TOKEN_FORMAT(
8735 parser,
8736 parser->current,
8737 PM_WARN_INVALID_CHARACTER,
8738 FLAG(flags),
8739 FLAG(flag),
8740 type
8741 );
8742
8743#undef FLAG
8744}
8745
8749static void
8750escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8751 uint8_t peeked = peek(parser);
8752 switch (peeked) {
8753 case '\\': {
8754 parser->current.end++;
8755 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8756 return;
8757 }
8758 case '\'': {
8759 parser->current.end++;
8760 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8761 return;
8762 }
8763 case 'a': {
8764 parser->current.end++;
8765 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8766 return;
8767 }
8768 case 'b': {
8769 parser->current.end++;
8770 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8771 return;
8772 }
8773 case 'e': {
8774 parser->current.end++;
8775 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8776 return;
8777 }
8778 case 'f': {
8779 parser->current.end++;
8780 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8781 return;
8782 }
8783 case 'n': {
8784 parser->current.end++;
8785 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8786 return;
8787 }
8788 case 'r': {
8789 parser->current.end++;
8790 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8791 return;
8792 }
8793 case 's': {
8794 parser->current.end++;
8795 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8796 return;
8797 }
8798 case 't': {
8799 parser->current.end++;
8800 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8801 return;
8802 }
8803 case 'v': {
8804 parser->current.end++;
8805 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8806 return;
8807 }
8808 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8809 uint8_t value = (uint8_t) (*parser->current.end - '0');
8810 parser->current.end++;
8811
8812 if (pm_char_is_octal_digit(peek(parser))) {
8813 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8814 parser->current.end++;
8815
8816 if (pm_char_is_octal_digit(peek(parser))) {
8817 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8818 parser->current.end++;
8819 }
8820 }
8821
8822 value = escape_byte(value, flags);
8823 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8824 return;
8825 }
8826 case 'x': {
8827 const uint8_t *start = parser->current.end - 1;
8828
8829 parser->current.end++;
8830 uint8_t byte = peek(parser);
8831
8832 if (pm_char_is_hexadecimal_digit(byte)) {
8833 uint8_t value = escape_hexadecimal_digit(byte);
8834 parser->current.end++;
8835
8836 byte = peek(parser);
8837 if (pm_char_is_hexadecimal_digit(byte)) {
8838 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8839 parser->current.end++;
8840 }
8841
8842 value = escape_byte(value, flags);
8843 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8844 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8845 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8846 } else {
8847 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8848 }
8849 }
8850
8851 escape_write_byte_encoded(parser, buffer, value);
8852 } else {
8853 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8854 }
8855
8856 return;
8857 }
8858 case 'u': {
8859 const uint8_t *start = parser->current.end - 1;
8860 parser->current.end++;
8861
8862 if (parser->current.end == parser->end) {
8863 const uint8_t *start = parser->current.end - 2;
8864 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8865 } else if (peek(parser) == '{') {
8866 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8867 parser->current.end++;
8868
8869 size_t whitespace;
8870 while (true) {
8871 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8872 parser->current.end += whitespace;
8873 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8874 // This is super hacky, but it gets us nicer error
8875 // messages because we can still pass it off to the
8876 // regular expression engine even if we hit an
8877 // unterminated regular expression.
8878 parser->current.end += 2;
8879 } else {
8880 break;
8881 }
8882 }
8883
8884 const uint8_t *extra_codepoints_start = NULL;
8885 int codepoints_count = 0;
8886
8887 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8888 const uint8_t *unicode_start = parser->current.end;
8889 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8890
8891 if (hexadecimal_length > 6) {
8892 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8893 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8894 } else if (hexadecimal_length == 0) {
8895 // there are not hexadecimal characters
8896
8897 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8898 // If this is a regular expression, we are going to
8899 // let the regular expression engine handle this
8900 // error instead of us.
8901 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8902 } else {
8903 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
8904 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8905 }
8906
8907 return;
8908 }
8909
8910 parser->current.end += hexadecimal_length;
8911 codepoints_count++;
8912 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8913 extra_codepoints_start = unicode_start;
8914 }
8915
8916 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
8917 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8918
8919 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8920 }
8921
8922 // ?\u{nnnn} character literal should contain only one codepoint
8923 // and cannot be like ?\u{nnnn mmmm}.
8924 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8925 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8926 }
8927
8928 if (parser->current.end == parser->end) {
8929 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8930 } else if (peek(parser) == '}') {
8931 parser->current.end++;
8932 } else {
8933 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8934 // If this is a regular expression, we are going to let
8935 // the regular expression engine handle this error
8936 // instead of us.
8937 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8938 } else {
8939 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8940 }
8941 }
8942
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8945 }
8946 } else {
8947 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8948
8949 if (length == 0) {
8950 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8951 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8952 } else {
8953 const uint8_t *start = parser->current.end - 2;
8954 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8955 }
8956 } else if (length == 4) {
8957 uint32_t value = escape_unicode(parser, parser->current.end, 4);
8958
8959 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8960 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8961 }
8962
8963 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8964 parser->current.end += 4;
8965 } else {
8966 parser->current.end += length;
8967
8968 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8969 // If this is a regular expression, we are going to let
8970 // the regular expression engine handle this error
8971 // instead of us.
8972 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8973 } else {
8974 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8975 }
8976 }
8977 }
8978
8979 return;
8980 }
8981 case 'c': {
8982 parser->current.end++;
8983 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8984 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8985 }
8986
8987 if (parser->current.end == parser->end) {
8988 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8989 return;
8990 }
8991
8992 uint8_t peeked = peek(parser);
8993 switch (peeked) {
8994 case '?': {
8995 parser->current.end++;
8996 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
8997 return;
8998 }
8999 case '\\':
9000 parser->current.end++;
9001
9002 if (match(parser, 'u') || match(parser, 'U')) {
9003 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9004 return;
9005 }
9006
9007 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9008 return;
9009 case ' ':
9010 parser->current.end++;
9011 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9012 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9013 return;
9014 case '\t':
9015 parser->current.end++;
9016 escape_read_warn(parser, flags, 0, "\\t");
9017 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9018 return;
9019 default: {
9020 if (!char_is_ascii_printable(peeked)) {
9021 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9022 return;
9023 }
9024
9025 parser->current.end++;
9026 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9027 return;
9028 }
9029 }
9030 }
9031 case 'C': {
9032 parser->current.end++;
9033 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9034 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9035 }
9036
9037 if (peek(parser) != '-') {
9038 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9039 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9040 return;
9041 }
9042
9043 parser->current.end++;
9044 if (parser->current.end == parser->end) {
9045 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9046 return;
9047 }
9048
9049 uint8_t peeked = peek(parser);
9050 switch (peeked) {
9051 case '?': {
9052 parser->current.end++;
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9054 return;
9055 }
9056 case '\\':
9057 parser->current.end++;
9058
9059 if (match(parser, 'u') || match(parser, 'U')) {
9060 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9061 return;
9062 }
9063
9064 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9065 return;
9066 case ' ':
9067 parser->current.end++;
9068 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9069 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9070 return;
9071 case '\t':
9072 parser->current.end++;
9073 escape_read_warn(parser, flags, 0, "\\t");
9074 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9075 return;
9076 default: {
9077 if (!char_is_ascii_printable(peeked)) {
9078 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9079 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9080 return;
9081 }
9082
9083 parser->current.end++;
9084 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9085 return;
9086 }
9087 }
9088 }
9089 case 'M': {
9090 parser->current.end++;
9091 if (flags & PM_ESCAPE_FLAG_META) {
9092 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9093 }
9094
9095 if (peek(parser) != '-') {
9096 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9097 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9098 return;
9099 }
9100
9101 parser->current.end++;
9102 if (parser->current.end == parser->end) {
9103 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9104 return;
9105 }
9106
9107 uint8_t peeked = peek(parser);
9108 switch (peeked) {
9109 case '\\':
9110 parser->current.end++;
9111
9112 if (match(parser, 'u') || match(parser, 'U')) {
9113 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9114 return;
9115 }
9116
9117 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9118 return;
9119 case ' ':
9120 parser->current.end++;
9121 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9122 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9123 return;
9124 case '\t':
9125 parser->current.end++;
9126 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9127 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9128 return;
9129 default:
9130 if (!char_is_ascii_printable(peeked)) {
9131 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9132 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9133 return;
9134 }
9135
9136 parser->current.end++;
9137 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9138 return;
9139 }
9140 }
9141 case '\r': {
9142 if (peek_offset(parser, 1) == '\n') {
9143 parser->current.end += 2;
9144 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9145 return;
9146 }
9148 }
9149 default: {
9150 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9151 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9152 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9153 return;
9154 }
9155 if (parser->current.end < parser->end) {
9156 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9157 } else {
9158 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9159 }
9160 return;
9161 }
9162 }
9163}
9164
9190static pm_token_type_t
9191lex_question_mark(pm_parser_t *parser) {
9192 if (lex_state_end_p(parser)) {
9193 lex_state_set(parser, PM_LEX_STATE_BEG);
9194 return PM_TOKEN_QUESTION_MARK;
9195 }
9196
9197 if (parser->current.end >= parser->end) {
9198 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9199 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9200 return PM_TOKEN_CHARACTER_LITERAL;
9201 }
9202
9203 if (pm_char_is_whitespace(*parser->current.end)) {
9204 lex_state_set(parser, PM_LEX_STATE_BEG);
9205 return PM_TOKEN_QUESTION_MARK;
9206 }
9207
9208 lex_state_set(parser, PM_LEX_STATE_BEG);
9209
9210 if (match(parser, '\\')) {
9211 lex_state_set(parser, PM_LEX_STATE_END);
9212
9213 pm_buffer_t buffer;
9214 pm_buffer_init_capacity(&buffer, 3);
9215
9216 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9217 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
9218
9219 return PM_TOKEN_CHARACTER_LITERAL;
9220 } else {
9221 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9222
9223 // Ternary operators can have a ? immediately followed by an identifier
9224 // which starts with an underscore. We check for this case here.
9225 if (
9226 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9227 (
9228 (parser->current.end + encoding_width >= parser->end) ||
9229 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9230 )
9231 ) {
9232 lex_state_set(parser, PM_LEX_STATE_END);
9233 parser->current.end += encoding_width;
9234 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9235 return PM_TOKEN_CHARACTER_LITERAL;
9236 }
9237 }
9238
9239 return PM_TOKEN_QUESTION_MARK;
9240}
9241
9246static pm_token_type_t
9247lex_at_variable(pm_parser_t *parser) {
9248 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9249 const uint8_t *end = parser->end;
9250
9251 size_t width;
9252 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9253 parser->current.end += width;
9254
9255 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9256 parser->current.end += width;
9257 }
9258 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9259 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9260 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9261 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9262 }
9263
9264 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9265 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9266 } else {
9267 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9268 pm_parser_err_token(parser, &parser->current, diag_id);
9269 }
9270
9271 // If we're lexing an embedded variable, then we need to pop back into the
9272 // parent lex context.
9273 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9274 lex_mode_pop(parser);
9275 }
9276
9277 return type;
9278}
9279
9283static inline void
9284parser_lex_callback(pm_parser_t *parser) {
9285 if (parser->lex_callback) {
9286 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9287 }
9288}
9289
9293static inline pm_comment_t *
9294parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9295 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9296 if (comment == NULL) return NULL;
9297
9298 *comment = (pm_comment_t) {
9299 .type = type,
9300 .location = { parser->current.start, parser->current.end }
9301 };
9302
9303 return comment;
9304}
9305
9311static pm_token_type_t
9312lex_embdoc(pm_parser_t *parser) {
9313 // First, lex out the EMBDOC_BEGIN token.
9314 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9315
9316 if (newline == NULL) {
9317 parser->current.end = parser->end;
9318 } else {
9319 pm_newline_list_append(&parser->newline_list, newline);
9320 parser->current.end = newline + 1;
9321 }
9322
9323 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9324 parser_lex_callback(parser);
9325
9326 // Now, create a comment that is going to be attached to the parser.
9327 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9328 if (comment == NULL) return PM_TOKEN_EOF;
9329
9330 // Now, loop until we find the end of the embedded documentation or the end
9331 // of the file.
9332 while (parser->current.end + 4 <= parser->end) {
9333 parser->current.start = parser->current.end;
9334
9335 // If we've hit the end of the embedded documentation then we'll return
9336 // that token here.
9337 if (
9338 (memcmp(parser->current.end, "=end", 4) == 0) &&
9339 (
9340 (parser->current.end + 4 == parser->end) || // end of file
9341 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9342 (parser->current.end[4] == '\0') || // NUL or end of script
9343 (parser->current.end[4] == '\004') || // ^D
9344 (parser->current.end[4] == '\032') // ^Z
9345 )
9346 ) {
9347 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9348
9349 if (newline == NULL) {
9350 parser->current.end = parser->end;
9351 } else {
9352 pm_newline_list_append(&parser->newline_list, newline);
9353 parser->current.end = newline + 1;
9354 }
9355
9356 parser->current.type = PM_TOKEN_EMBDOC_END;
9357 parser_lex_callback(parser);
9358
9359 comment->location.end = parser->current.end;
9360 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9361
9362 return PM_TOKEN_EMBDOC_END;
9363 }
9364
9365 // Otherwise, we'll parse until the end of the line and return a line of
9366 // embedded documentation.
9367 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9368
9369 if (newline == NULL) {
9370 parser->current.end = parser->end;
9371 } else {
9372 pm_newline_list_append(&parser->newline_list, newline);
9373 parser->current.end = newline + 1;
9374 }
9375
9376 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9377 parser_lex_callback(parser);
9378 }
9379
9380 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9381
9382 comment->location.end = parser->current.end;
9383 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9384
9385 return PM_TOKEN_EOF;
9386}
9387
9393static inline void
9394parser_lex_ignored_newline(pm_parser_t *parser) {
9395 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9396 parser_lex_callback(parser);
9397}
9398
9408static inline void
9409parser_flush_heredoc_end(pm_parser_t *parser) {
9410 assert(parser->heredoc_end <= parser->end);
9411 parser->next_start = parser->heredoc_end;
9412 parser->heredoc_end = NULL;
9413}
9414
9418static bool
9419parser_end_of_line_p(const pm_parser_t *parser) {
9420 const uint8_t *cursor = parser->current.end;
9421
9422 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9423 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9424 }
9425
9426 return true;
9427}
9428
9447typedef struct {
9453
9458 const uint8_t *cursor;
9460
9480
9484static inline void
9485pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9486 pm_buffer_append_byte(&token_buffer->buffer, byte);
9487}
9488
9489static inline void
9490pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9491 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9492}
9493
9497static inline size_t
9498parser_char_width(const pm_parser_t *parser) {
9499 size_t width;
9500 if (parser->encoding_changed) {
9501 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9502 } else {
9503 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9504 }
9505
9506 // TODO: If the character is invalid in the given encoding, then we'll just
9507 // push one byte into the buffer. This should actually be an error.
9508 return (width == 0 ? 1 : width);
9509}
9510
9514static void
9515pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9516 size_t width = parser_char_width(parser);
9517 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9518 parser->current.end += width;
9519}
9520
9521static void
9522pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9523 size_t width = parser_char_width(parser);
9524 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9525 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9526 parser->current.end += width;
9527}
9528
9529static bool
9530pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9531 for (size_t index = 0; index < length; index++) {
9532 if (value[index] & 0x80) return false;
9533 }
9534
9535 return true;
9536}
9537
9544static inline void
9545pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9546 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
9547}
9548
9549static inline void
9550pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9551 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
9552 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9553 pm_buffer_free(&token_buffer->regexp_buffer);
9554}
9555
9565static void
9566pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9567 if (token_buffer->cursor == NULL) {
9568 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9569 } else {
9570 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9571 pm_token_buffer_copy(parser, token_buffer);
9572 }
9573}
9574
9575static void
9576pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9577 if (token_buffer->base.cursor == NULL) {
9578 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9579 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9580 } else {
9581 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9582 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9583 pm_regexp_token_buffer_copy(parser, token_buffer);
9584 }
9585}
9586
9587#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9588
9597static void
9598pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9599 const uint8_t *start;
9600 if (token_buffer->cursor == NULL) {
9601 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9602 start = parser->current.start;
9603 } else {
9604 start = token_buffer->cursor;
9605 }
9606
9607 const uint8_t *end = parser->current.end - 1;
9608 assert(end >= start);
9609 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9610
9611 token_buffer->cursor = end;
9612}
9613
9614static void
9615pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9616 const uint8_t *start;
9617 if (token_buffer->base.cursor == NULL) {
9618 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9619 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9620 start = parser->current.start;
9621 } else {
9622 start = token_buffer->base.cursor;
9623 }
9624
9625 const uint8_t *end = parser->current.end - 1;
9626 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9627 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9628
9629 token_buffer->base.cursor = end;
9630}
9631
9632#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9633
9638static inline size_t
9639pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9640 size_t whitespace = 0;
9641
9642 switch (indent) {
9643 case PM_HEREDOC_INDENT_NONE:
9644 // Do nothing, we can't match a terminator with
9645 // indentation and there's no need to calculate common
9646 // whitespace.
9647 break;
9648 case PM_HEREDOC_INDENT_DASH:
9649 // Skip past inline whitespace.
9650 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9651 break;
9652 case PM_HEREDOC_INDENT_TILDE:
9653 // Skip past inline whitespace and calculate common
9654 // whitespace.
9655 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9656 if (**cursor == '\t') {
9657 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9658 } else {
9659 whitespace++;
9660 }
9661 (*cursor)++;
9662 }
9663
9664 break;
9665 }
9666
9667 return whitespace;
9668}
9669
9674static uint8_t
9675pm_lex_percent_delimiter(pm_parser_t *parser) {
9676 size_t eol_length = match_eol(parser);
9677
9678 if (eol_length) {
9679 if (parser->heredoc_end) {
9680 // If we have already lexed a heredoc, then the newline has already
9681 // been added to the list. In this case we want to just flush the
9682 // heredoc end.
9683 parser_flush_heredoc_end(parser);
9684 } else {
9685 // Otherwise, we'll add the newline to the list of newlines.
9686 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
9687 }
9688
9689 uint8_t delimiter = *parser->current.end;
9690
9691 // If our delimiter is \r\n, we want to treat it as if it's \n.
9692 // For example, %\r\nfoo\r\n should be "foo"
9693 if (eol_length == 2) {
9694 delimiter = *(parser->current.end + 1);
9695 }
9696
9697 parser->current.end += eol_length;
9698 return delimiter;
9699 }
9700
9701 return *parser->current.end++;
9702}
9703
9708#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9709
9716static void
9717parser_lex(pm_parser_t *parser) {
9718 assert(parser->current.end <= parser->end);
9719 parser->previous = parser->current;
9720
9721 // This value mirrors cmd_state from CRuby.
9722 bool previous_command_start = parser->command_start;
9723 parser->command_start = false;
9724
9725 // This is used to communicate to the newline lexing function that we've
9726 // already seen a comment.
9727 bool lexed_comment = false;
9728
9729 // Here we cache the current value of the semantic token seen flag. This is
9730 // used to reset it in case we find a token that shouldn't flip this flag.
9731 unsigned int semantic_token_seen = parser->semantic_token_seen;
9732 parser->semantic_token_seen = true;
9733
9734 switch (parser->lex_modes.current->mode) {
9735 case PM_LEX_DEFAULT:
9736 case PM_LEX_EMBEXPR:
9737 case PM_LEX_EMBVAR:
9738
9739 // We have a specific named label here because we are going to jump back to
9740 // this location in the event that we have lexed a token that should not be
9741 // returned to the parser. This includes comments, ignored newlines, and
9742 // invalid tokens of some form.
9743 lex_next_token: {
9744 // If we have the special next_start pointer set, then we're going to jump
9745 // to that location and start lexing from there.
9746 if (parser->next_start != NULL) {
9747 parser->current.end = parser->next_start;
9748 parser->next_start = NULL;
9749 }
9750
9751 // This value mirrors space_seen from CRuby. It tracks whether or not
9752 // space has been eaten before the start of the next token.
9753 bool space_seen = false;
9754
9755 // First, we're going to skip past any whitespace at the front of the next
9756 // token.
9757 bool chomping = true;
9758 while (parser->current.end < parser->end && chomping) {
9759 switch (*parser->current.end) {
9760 case ' ':
9761 case '\t':
9762 case '\f':
9763 case '\v':
9764 parser->current.end++;
9765 space_seen = true;
9766 break;
9767 case '\r':
9768 if (match_eol_offset(parser, 1)) {
9769 chomping = false;
9770 } else {
9771 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9772 parser->current.end++;
9773 space_seen = true;
9774 }
9775 break;
9776 case '\\': {
9777 size_t eol_length = match_eol_offset(parser, 1);
9778 if (eol_length) {
9779 if (parser->heredoc_end) {
9780 parser->current.end = parser->heredoc_end;
9781 parser->heredoc_end = NULL;
9782 } else {
9783 parser->current.end += eol_length + 1;
9784 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9785 space_seen = true;
9786 }
9787 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9788 parser->current.end += 2;
9789 } else {
9790 chomping = false;
9791 }
9792
9793 break;
9794 }
9795 default:
9796 chomping = false;
9797 break;
9798 }
9799 }
9800
9801 // Next, we'll set to start of this token to be the current end.
9802 parser->current.start = parser->current.end;
9803
9804 // We'll check if we're at the end of the file. If we are, then we
9805 // need to return the EOF token.
9806 if (parser->current.end >= parser->end) {
9807 // If we hit EOF, but the EOF came immediately after a newline,
9808 // set the start of the token to the newline. This way any EOF
9809 // errors will be reported as happening on that line rather than
9810 // a line after. For example "foo(\n" should report an error
9811 // on line 1 even though EOF technically occurs on line 2.
9812 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9813 parser->current.start -= 1;
9814 }
9815 LEX(PM_TOKEN_EOF);
9816 }
9817
9818 // Finally, we'll check the current character to determine the next
9819 // token.
9820 switch (*parser->current.end++) {
9821 case '\0': // NUL or end of script
9822 case '\004': // ^D
9823 case '\032': // ^Z
9824 parser->current.end--;
9825 LEX(PM_TOKEN_EOF);
9826
9827 case '#': { // comments
9828 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9829 parser->current.end = ending == NULL ? parser->end : ending;
9830
9831 // If we found a comment while lexing, then we're going to
9832 // add it to the list of comments in the file and keep
9833 // lexing.
9834 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9835 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9836
9837 if (ending) parser->current.end++;
9838 parser->current.type = PM_TOKEN_COMMENT;
9839 parser_lex_callback(parser);
9840
9841 // Here, parse the comment to see if it's a magic comment
9842 // and potentially change state on the parser.
9843 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9844 ptrdiff_t length = parser->current.end - parser->current.start;
9845
9846 // If we didn't find a magic comment within the first
9847 // pass and we're at the start of the file, then we need
9848 // to do another pass to potentially find other patterns
9849 // for encoding comments.
9850 if (length >= 10 && !parser->encoding_locked) {
9851 parser_lex_magic_comment_encoding(parser);
9852 }
9853 }
9854
9855 lexed_comment = true;
9856 }
9858 case '\r':
9859 case '\n': {
9860 parser->semantic_token_seen = semantic_token_seen & 0x1;
9861 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9862
9863 if (eol_length) {
9864 // The only way you can have carriage returns in this
9865 // particular loop is if you have a carriage return
9866 // followed by a newline. In that case we'll just skip
9867 // over the carriage return and continue lexing, in
9868 // order to make it so that the newline token
9869 // encapsulates both the carriage return and the
9870 // newline. Note that we need to check that we haven't
9871 // already lexed a comment here because that falls
9872 // through into here as well.
9873 if (!lexed_comment) {
9874 parser->current.end += eol_length - 1; // skip CR
9875 }
9876
9877 if (parser->heredoc_end == NULL) {
9878 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9879 }
9880 }
9881
9882 if (parser->heredoc_end) {
9883 parser_flush_heredoc_end(parser);
9884 }
9885
9886 // If this is an ignored newline, then we can continue lexing after
9887 // calling the callback with the ignored newline token.
9888 switch (lex_state_ignored_p(parser)) {
9889 case PM_IGNORED_NEWLINE_NONE:
9890 break;
9891 case PM_IGNORED_NEWLINE_PATTERN:
9892 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9893 if (!lexed_comment) parser_lex_ignored_newline(parser);
9894 lex_state_set(parser, PM_LEX_STATE_BEG);
9895 parser->command_start = true;
9896 parser->current.type = PM_TOKEN_NEWLINE;
9897 return;
9898 }
9900 case PM_IGNORED_NEWLINE_ALL:
9901 if (!lexed_comment) parser_lex_ignored_newline(parser);
9902 lexed_comment = false;
9903 goto lex_next_token;
9904 }
9905
9906 // Here we need to look ahead and see if there is a call operator
9907 // (either . or &.) that starts the next line. If there is, then this
9908 // is going to become an ignored newline and we're going to instead
9909 // return the call operator.
9910 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9911 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9912
9913 if (next_content < parser->end) {
9914 // If we hit a comment after a newline, then we're going to check
9915 // if it's ignored or if it's followed by a method call ('.').
9916 // If it is, then we're going to call the
9917 // callback with an ignored newline and then continue lexing.
9918 // Otherwise we'll return a regular newline.
9919 if (next_content[0] == '#') {
9920 // Here we look for a "." or "&." following a "\n".
9921 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9922
9923 while (following && (following + 1 < parser->end)) {
9924 following++;
9925 following += pm_strspn_inline_whitespace(following, parser->end - following);
9926
9927 // If this is not followed by a comment, then we can break out
9928 // of this loop.
9929 if (peek_at(parser, following) != '#') break;
9930
9931 // If there is a comment, then we need to find the end of the
9932 // comment and continue searching from there.
9933 following = next_newline(following, parser->end - following);
9934 }
9935
9936 // If the lex state was ignored, we will lex the
9937 // ignored newline.
9938 if (lex_state_ignored_p(parser)) {
9939 if (!lexed_comment) parser_lex_ignored_newline(parser);
9940 lexed_comment = false;
9941 goto lex_next_token;
9942 }
9943
9944 // If we hit a '.' or a '&.' we will lex the ignored
9945 // newline.
9946 if (following && (
9947 (peek_at(parser, following) == '.') ||
9948 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9949 )) {
9950 if (!lexed_comment) parser_lex_ignored_newline(parser);
9951 lexed_comment = false;
9952 goto lex_next_token;
9953 }
9954
9955
9956 // If we are parsing as CRuby 4.0 or later and we
9957 // hit a '&&' or a '||' then we will lex the ignored
9958 // newline.
9959 if (
9961 following && (
9962 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9963 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9964 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
9965 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
9966 )
9967 ) {
9968 if (!lexed_comment) parser_lex_ignored_newline(parser);
9969 lexed_comment = false;
9970 goto lex_next_token;
9971 }
9972 }
9973
9974 // If we hit a . after a newline, then we're in a call chain and
9975 // we need to return the call operator.
9976 if (next_content[0] == '.') {
9977 // To match ripper, we need to emit an ignored newline even though
9978 // it's a real newline in the case that we have a beginless range
9979 // on a subsequent line.
9980 if (peek_at(parser, next_content + 1) == '.') {
9981 if (!lexed_comment) parser_lex_ignored_newline(parser);
9982 lex_state_set(parser, PM_LEX_STATE_BEG);
9983 parser->command_start = true;
9984 parser->current.type = PM_TOKEN_NEWLINE;
9985 return;
9986 }
9987
9988 if (!lexed_comment) parser_lex_ignored_newline(parser);
9989 lex_state_set(parser, PM_LEX_STATE_DOT);
9990 parser->current.start = next_content;
9991 parser->current.end = next_content + 1;
9992 parser->next_start = NULL;
9993 LEX(PM_TOKEN_DOT);
9994 }
9995
9996 // If we hit a &. after a newline, then we're in a call chain and
9997 // we need to return the call operator.
9998 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
9999 if (!lexed_comment) parser_lex_ignored_newline(parser);
10000 lex_state_set(parser, PM_LEX_STATE_DOT);
10001 parser->current.start = next_content;
10002 parser->current.end = next_content + 2;
10003 parser->next_start = NULL;
10004 LEX(PM_TOKEN_AMPERSAND_DOT);
10005 }
10006
10007 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10008 // If we hit an && then we are in a logical chain
10009 // and we need to return the logical operator.
10010 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10011 if (!lexed_comment) parser_lex_ignored_newline(parser);
10012 lex_state_set(parser, PM_LEX_STATE_BEG);
10013 parser->current.start = next_content;
10014 parser->current.end = next_content + 2;
10015 parser->next_start = NULL;
10016 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10017 }
10018
10019 // If we hit a || then we are in a logical chain and
10020 // we need to return the logical operator.
10021 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10022 if (!lexed_comment) parser_lex_ignored_newline(parser);
10023 lex_state_set(parser, PM_LEX_STATE_BEG);
10024 parser->current.start = next_content;
10025 parser->current.end = next_content + 2;
10026 parser->next_start = NULL;
10027 LEX(PM_TOKEN_PIPE_PIPE);
10028 }
10029
10030 // If we hit an 'and' then we are in a logical chain
10031 // and we need to return the logical operator.
10032 if (
10033 peek_at(parser, next_content) == 'a' &&
10034 peek_at(parser, next_content + 1) == 'n' &&
10035 peek_at(parser, next_content + 2) == 'd' &&
10036 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10037 ) {
10038 if (!lexed_comment) parser_lex_ignored_newline(parser);
10039 lex_state_set(parser, PM_LEX_STATE_BEG);
10040 parser->current.start = next_content;
10041 parser->current.end = next_content + 3;
10042 parser->next_start = NULL;
10043 parser->command_start = true;
10044 LEX(PM_TOKEN_KEYWORD_AND);
10045 }
10046
10047 // If we hit a 'or' then we are in a logical chain
10048 // and we need to return the logical operator.
10049 if (
10050 peek_at(parser, next_content) == 'o' &&
10051 peek_at(parser, next_content + 1) == 'r' &&
10052 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10053 ) {
10054 if (!lexed_comment) parser_lex_ignored_newline(parser);
10055 lex_state_set(parser, PM_LEX_STATE_BEG);
10056 parser->current.start = next_content;
10057 parser->current.end = next_content + 2;
10058 parser->next_start = NULL;
10059 parser->command_start = true;
10060 LEX(PM_TOKEN_KEYWORD_OR);
10061 }
10062 }
10063 }
10064
10065 // At this point we know this is a regular newline, and we can set the
10066 // necessary state and return the token.
10067 lex_state_set(parser, PM_LEX_STATE_BEG);
10068 parser->command_start = true;
10069 parser->current.type = PM_TOKEN_NEWLINE;
10070 if (!lexed_comment) parser_lex_callback(parser);
10071 return;
10072 }
10073
10074 // ,
10075 case ',':
10076 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10077 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10078 }
10079
10080 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10081 LEX(PM_TOKEN_COMMA);
10082
10083 // (
10084 case '(': {
10085 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10086
10087 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10088 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10089 }
10090
10091 parser->enclosure_nesting++;
10092 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10093 pm_do_loop_stack_push(parser, false);
10094 LEX(type);
10095 }
10096
10097 // )
10098 case ')':
10099 parser->enclosure_nesting--;
10100 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10101 pm_do_loop_stack_pop(parser);
10102 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10103
10104 // ;
10105 case ';':
10106 lex_state_set(parser, PM_LEX_STATE_BEG);
10107 parser->command_start = true;
10108 LEX(PM_TOKEN_SEMICOLON);
10109
10110 // [ [] []=
10111 case '[':
10112 parser->enclosure_nesting++;
10113 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10114
10115 if (lex_state_operator_p(parser)) {
10116 if (match(parser, ']')) {
10117 parser->enclosure_nesting--;
10118 lex_state_set(parser, PM_LEX_STATE_ARG);
10119 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10120 }
10121
10122 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10123 LEX(type);
10124 }
10125
10126 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10127 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10128 }
10129
10130 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10131 pm_do_loop_stack_push(parser, false);
10132 LEX(type);
10133
10134 // ]
10135 case ']':
10136 parser->enclosure_nesting--;
10137 lex_state_set(parser, PM_LEX_STATE_END);
10138 pm_do_loop_stack_pop(parser);
10139 LEX(PM_TOKEN_BRACKET_RIGHT);
10140
10141 // {
10142 case '{': {
10143 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10144
10145 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10146 // This { begins a lambda
10147 parser->command_start = true;
10148 lex_state_set(parser, PM_LEX_STATE_BEG);
10149 type = PM_TOKEN_LAMBDA_BEGIN;
10150 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10151 // This { begins a hash literal
10152 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10153 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10154 // This { begins a block
10155 parser->command_start = true;
10156 lex_state_set(parser, PM_LEX_STATE_BEG);
10157 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10158 // This { begins a block on a command
10159 parser->command_start = true;
10160 lex_state_set(parser, PM_LEX_STATE_BEG);
10161 } else {
10162 // This { begins a hash literal
10163 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10164 }
10165
10166 parser->enclosure_nesting++;
10167 parser->brace_nesting++;
10168 pm_do_loop_stack_push(parser, false);
10169
10170 LEX(type);
10171 }
10172
10173 // }
10174 case '}':
10175 parser->enclosure_nesting--;
10176 pm_do_loop_stack_pop(parser);
10177
10178 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10179 lex_mode_pop(parser);
10180 LEX(PM_TOKEN_EMBEXPR_END);
10181 }
10182
10183 parser->brace_nesting--;
10184 lex_state_set(parser, PM_LEX_STATE_END);
10185 LEX(PM_TOKEN_BRACE_RIGHT);
10186
10187 // * ** **= *=
10188 case '*': {
10189 if (match(parser, '*')) {
10190 if (match(parser, '=')) {
10191 lex_state_set(parser, PM_LEX_STATE_BEG);
10192 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10193 }
10194
10195 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10196
10197 if (lex_state_spcarg_p(parser, space_seen)) {
10198 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10199 type = PM_TOKEN_USTAR_STAR;
10200 } else if (lex_state_beg_p(parser)) {
10201 type = PM_TOKEN_USTAR_STAR;
10202 } else if (ambiguous_operator_p(parser, space_seen)) {
10203 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10204 }
10205
10206 if (lex_state_operator_p(parser)) {
10207 lex_state_set(parser, PM_LEX_STATE_ARG);
10208 } else {
10209 lex_state_set(parser, PM_LEX_STATE_BEG);
10210 }
10211
10212 LEX(type);
10213 }
10214
10215 if (match(parser, '=')) {
10216 lex_state_set(parser, PM_LEX_STATE_BEG);
10217 LEX(PM_TOKEN_STAR_EQUAL);
10218 }
10219
10220 pm_token_type_t type = PM_TOKEN_STAR;
10221
10222 if (lex_state_spcarg_p(parser, space_seen)) {
10223 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10224 type = PM_TOKEN_USTAR;
10225 } else if (lex_state_beg_p(parser)) {
10226 type = PM_TOKEN_USTAR;
10227 } else if (ambiguous_operator_p(parser, space_seen)) {
10228 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10229 }
10230
10231 if (lex_state_operator_p(parser)) {
10232 lex_state_set(parser, PM_LEX_STATE_ARG);
10233 } else {
10234 lex_state_set(parser, PM_LEX_STATE_BEG);
10235 }
10236
10237 LEX(type);
10238 }
10239
10240 // ! != !~ !@
10241 case '!':
10242 if (lex_state_operator_p(parser)) {
10243 lex_state_set(parser, PM_LEX_STATE_ARG);
10244 if (match(parser, '@')) {
10245 LEX(PM_TOKEN_BANG);
10246 }
10247 } else {
10248 lex_state_set(parser, PM_LEX_STATE_BEG);
10249 }
10250
10251 if (match(parser, '=')) {
10252 LEX(PM_TOKEN_BANG_EQUAL);
10253 }
10254
10255 if (match(parser, '~')) {
10256 LEX(PM_TOKEN_BANG_TILDE);
10257 }
10258
10259 LEX(PM_TOKEN_BANG);
10260
10261 // = => =~ == === =begin
10262 case '=':
10263 if (
10264 current_token_starts_line(parser) &&
10265 (parser->current.end + 5 <= parser->end) &&
10266 memcmp(parser->current.end, "begin", 5) == 0 &&
10267 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10268 ) {
10269 pm_token_type_t type = lex_embdoc(parser);
10270 if (type == PM_TOKEN_EOF) {
10271 LEX(type);
10272 }
10273
10274 goto lex_next_token;
10275 }
10276
10277 if (lex_state_operator_p(parser)) {
10278 lex_state_set(parser, PM_LEX_STATE_ARG);
10279 } else {
10280 lex_state_set(parser, PM_LEX_STATE_BEG);
10281 }
10282
10283 if (match(parser, '>')) {
10284 LEX(PM_TOKEN_EQUAL_GREATER);
10285 }
10286
10287 if (match(parser, '~')) {
10288 LEX(PM_TOKEN_EQUAL_TILDE);
10289 }
10290
10291 if (match(parser, '=')) {
10292 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10293 }
10294
10295 LEX(PM_TOKEN_EQUAL);
10296
10297 // < << <<= <= <=>
10298 case '<':
10299 if (match(parser, '<')) {
10300 if (
10301 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10302 !lex_state_end_p(parser) &&
10303 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10304 ) {
10305 const uint8_t *end = parser->current.end;
10306
10307 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10308 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10309
10310 if (match(parser, '-')) {
10311 indent = PM_HEREDOC_INDENT_DASH;
10312 }
10313 else if (match(parser, '~')) {
10314 indent = PM_HEREDOC_INDENT_TILDE;
10315 }
10316
10317 if (match(parser, '`')) {
10318 quote = PM_HEREDOC_QUOTE_BACKTICK;
10319 }
10320 else if (match(parser, '"')) {
10321 quote = PM_HEREDOC_QUOTE_DOUBLE;
10322 }
10323 else if (match(parser, '\'')) {
10324 quote = PM_HEREDOC_QUOTE_SINGLE;
10325 }
10326
10327 const uint8_t *ident_start = parser->current.end;
10328 size_t width = 0;
10329
10330 if (parser->current.end >= parser->end) {
10331 parser->current.end = end;
10332 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10333 parser->current.end = end;
10334 } else {
10335 if (quote == PM_HEREDOC_QUOTE_NONE) {
10336 parser->current.end += width;
10337
10338 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10339 parser->current.end += width;
10340 }
10341 } else {
10342 // If we have quotes, then we're going to go until we find the
10343 // end quote.
10344 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10345 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10346 parser->current.end++;
10347 }
10348 }
10349
10350 size_t ident_length = (size_t) (parser->current.end - ident_start);
10351 bool ident_error = false;
10352
10353 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10354 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10355 ident_error = true;
10356 }
10357
10358 parser->explicit_encoding = NULL;
10359 lex_mode_push(parser, (pm_lex_mode_t) {
10360 .mode = PM_LEX_HEREDOC,
10361 .as.heredoc = {
10362 .base = {
10363 .ident_start = ident_start,
10364 .ident_length = ident_length,
10365 .quote = quote,
10366 .indent = indent
10367 },
10368 .next_start = parser->current.end,
10369 .common_whitespace = NULL,
10370 .line_continuation = false
10371 }
10372 });
10373
10374 if (parser->heredoc_end == NULL) {
10375 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10376
10377 if (body_start == NULL) {
10378 // If there is no newline after the heredoc identifier, then
10379 // this is not a valid heredoc declaration. In this case we
10380 // will add an error, but we will still return a heredoc
10381 // start.
10382 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10383 body_start = parser->end;
10384 } else {
10385 // Otherwise, we want to indicate that the body of the
10386 // heredoc starts on the character after the next newline.
10387 pm_newline_list_append(&parser->newline_list, body_start);
10388 body_start++;
10389 }
10390
10391 parser->next_start = body_start;
10392 } else {
10393 parser->next_start = parser->heredoc_end;
10394 }
10395
10396 LEX(PM_TOKEN_HEREDOC_START);
10397 }
10398 }
10399
10400 if (match(parser, '=')) {
10401 lex_state_set(parser, PM_LEX_STATE_BEG);
10402 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10403 }
10404
10405 if (ambiguous_operator_p(parser, space_seen)) {
10406 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10407 }
10408
10409 if (lex_state_operator_p(parser)) {
10410 lex_state_set(parser, PM_LEX_STATE_ARG);
10411 } else {
10412 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10413 lex_state_set(parser, PM_LEX_STATE_BEG);
10414 }
10415
10416 LEX(PM_TOKEN_LESS_LESS);
10417 }
10418
10419 if (lex_state_operator_p(parser)) {
10420 lex_state_set(parser, PM_LEX_STATE_ARG);
10421 } else {
10422 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10423 lex_state_set(parser, PM_LEX_STATE_BEG);
10424 }
10425
10426 if (match(parser, '=')) {
10427 if (match(parser, '>')) {
10428 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10429 }
10430
10431 LEX(PM_TOKEN_LESS_EQUAL);
10432 }
10433
10434 LEX(PM_TOKEN_LESS);
10435
10436 // > >> >>= >=
10437 case '>':
10438 if (match(parser, '>')) {
10439 if (lex_state_operator_p(parser)) {
10440 lex_state_set(parser, PM_LEX_STATE_ARG);
10441 } else {
10442 lex_state_set(parser, PM_LEX_STATE_BEG);
10443 }
10444 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10445 }
10446
10447 if (lex_state_operator_p(parser)) {
10448 lex_state_set(parser, PM_LEX_STATE_ARG);
10449 } else {
10450 lex_state_set(parser, PM_LEX_STATE_BEG);
10451 }
10452
10453 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10454
10455 // double-quoted string literal
10456 case '"': {
10457 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10458 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10459 LEX(PM_TOKEN_STRING_BEGIN);
10460 }
10461
10462 // xstring literal
10463 case '`': {
10464 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10465 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10466 LEX(PM_TOKEN_BACKTICK);
10467 }
10468
10469 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10470 if (previous_command_start) {
10471 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10472 } else {
10473 lex_state_set(parser, PM_LEX_STATE_ARG);
10474 }
10475
10476 LEX(PM_TOKEN_BACKTICK);
10477 }
10478
10479 lex_mode_push_string(parser, true, false, '\0', '`');
10480 LEX(PM_TOKEN_BACKTICK);
10481 }
10482
10483 // single-quoted string literal
10484 case '\'': {
10485 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10486 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10487 LEX(PM_TOKEN_STRING_BEGIN);
10488 }
10489
10490 // ? character literal
10491 case '?':
10492 LEX(lex_question_mark(parser));
10493
10494 // & && &&= &=
10495 case '&': {
10496 if (match(parser, '&')) {
10497 lex_state_set(parser, PM_LEX_STATE_BEG);
10498
10499 if (match(parser, '=')) {
10500 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10501 }
10502
10503 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10504 }
10505
10506 if (match(parser, '=')) {
10507 lex_state_set(parser, PM_LEX_STATE_BEG);
10508 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10509 }
10510
10511 if (match(parser, '.')) {
10512 lex_state_set(parser, PM_LEX_STATE_DOT);
10513 LEX(PM_TOKEN_AMPERSAND_DOT);
10514 }
10515
10516 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10517 if (lex_state_spcarg_p(parser, space_seen)) {
10518 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10519 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10520 } else {
10521 const uint8_t delim = peek_offset(parser, 1);
10522
10523 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10524 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10525 }
10526 }
10527
10528 type = PM_TOKEN_UAMPERSAND;
10529 } else if (lex_state_beg_p(parser)) {
10530 type = PM_TOKEN_UAMPERSAND;
10531 } else if (ambiguous_operator_p(parser, space_seen)) {
10532 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10533 }
10534
10535 if (lex_state_operator_p(parser)) {
10536 lex_state_set(parser, PM_LEX_STATE_ARG);
10537 } else {
10538 lex_state_set(parser, PM_LEX_STATE_BEG);
10539 }
10540
10541 LEX(type);
10542 }
10543
10544 // | || ||= |=
10545 case '|':
10546 if (match(parser, '|')) {
10547 if (match(parser, '=')) {
10548 lex_state_set(parser, PM_LEX_STATE_BEG);
10549 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10550 }
10551
10552 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10553 parser->current.end--;
10554 LEX(PM_TOKEN_PIPE);
10555 }
10556
10557 lex_state_set(parser, PM_LEX_STATE_BEG);
10558 LEX(PM_TOKEN_PIPE_PIPE);
10559 }
10560
10561 if (match(parser, '=')) {
10562 lex_state_set(parser, PM_LEX_STATE_BEG);
10563 LEX(PM_TOKEN_PIPE_EQUAL);
10564 }
10565
10566 if (lex_state_operator_p(parser)) {
10567 lex_state_set(parser, PM_LEX_STATE_ARG);
10568 } else {
10569 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10570 }
10571
10572 LEX(PM_TOKEN_PIPE);
10573
10574 // + += +@
10575 case '+': {
10576 if (lex_state_operator_p(parser)) {
10577 lex_state_set(parser, PM_LEX_STATE_ARG);
10578
10579 if (match(parser, '@')) {
10580 LEX(PM_TOKEN_UPLUS);
10581 }
10582
10583 LEX(PM_TOKEN_PLUS);
10584 }
10585
10586 if (match(parser, '=')) {
10587 lex_state_set(parser, PM_LEX_STATE_BEG);
10588 LEX(PM_TOKEN_PLUS_EQUAL);
10589 }
10590
10591 if (
10592 lex_state_beg_p(parser) ||
10593 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10594 ) {
10595 lex_state_set(parser, PM_LEX_STATE_BEG);
10596
10597 if (pm_char_is_decimal_digit(peek(parser))) {
10598 parser->current.end++;
10599 pm_token_type_t type = lex_numeric(parser);
10600 lex_state_set(parser, PM_LEX_STATE_END);
10601 LEX(type);
10602 }
10603
10604 LEX(PM_TOKEN_UPLUS);
10605 }
10606
10607 if (ambiguous_operator_p(parser, space_seen)) {
10608 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10609 }
10610
10611 lex_state_set(parser, PM_LEX_STATE_BEG);
10612 LEX(PM_TOKEN_PLUS);
10613 }
10614
10615 // - -= -@
10616 case '-': {
10617 if (lex_state_operator_p(parser)) {
10618 lex_state_set(parser, PM_LEX_STATE_ARG);
10619
10620 if (match(parser, '@')) {
10621 LEX(PM_TOKEN_UMINUS);
10622 }
10623
10624 LEX(PM_TOKEN_MINUS);
10625 }
10626
10627 if (match(parser, '=')) {
10628 lex_state_set(parser, PM_LEX_STATE_BEG);
10629 LEX(PM_TOKEN_MINUS_EQUAL);
10630 }
10631
10632 if (match(parser, '>')) {
10633 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10634 LEX(PM_TOKEN_MINUS_GREATER);
10635 }
10636
10637 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10638 bool is_beg = lex_state_beg_p(parser);
10639 if (!is_beg && spcarg) {
10640 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10641 }
10642
10643 if (is_beg || spcarg) {
10644 lex_state_set(parser, PM_LEX_STATE_BEG);
10645 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10646 }
10647
10648 if (ambiguous_operator_p(parser, space_seen)) {
10649 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10650 }
10651
10652 lex_state_set(parser, PM_LEX_STATE_BEG);
10653 LEX(PM_TOKEN_MINUS);
10654 }
10655
10656 // . .. ...
10657 case '.': {
10658 bool beg_p = lex_state_beg_p(parser);
10659
10660 if (match(parser, '.')) {
10661 if (match(parser, '.')) {
10662 // If we're _not_ inside a range within default parameters
10663 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10664 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10665 lex_state_set(parser, PM_LEX_STATE_BEG);
10666 } else {
10667 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10668 }
10669 LEX(PM_TOKEN_UDOT_DOT_DOT);
10670 }
10671
10672 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10673 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10674 }
10675
10676 lex_state_set(parser, PM_LEX_STATE_BEG);
10677 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10678 }
10679
10680 lex_state_set(parser, PM_LEX_STATE_BEG);
10681 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10682 }
10683
10684 lex_state_set(parser, PM_LEX_STATE_DOT);
10685 LEX(PM_TOKEN_DOT);
10686 }
10687
10688 // integer
10689 case '0':
10690 case '1':
10691 case '2':
10692 case '3':
10693 case '4':
10694 case '5':
10695 case '6':
10696 case '7':
10697 case '8':
10698 case '9': {
10699 pm_token_type_t type = lex_numeric(parser);
10700 lex_state_set(parser, PM_LEX_STATE_END);
10701 LEX(type);
10702 }
10703
10704 // :: symbol
10705 case ':':
10706 if (match(parser, ':')) {
10707 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10708 lex_state_set(parser, PM_LEX_STATE_BEG);
10709 LEX(PM_TOKEN_UCOLON_COLON);
10710 }
10711
10712 lex_state_set(parser, PM_LEX_STATE_DOT);
10713 LEX(PM_TOKEN_COLON_COLON);
10714 }
10715
10716 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10717 lex_state_set(parser, PM_LEX_STATE_BEG);
10718 LEX(PM_TOKEN_COLON);
10719 }
10720
10721 if (peek(parser) == '"' || peek(parser) == '\'') {
10722 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10723 parser->current.end++;
10724 }
10725
10726 lex_state_set(parser, PM_LEX_STATE_FNAME);
10727 LEX(PM_TOKEN_SYMBOL_BEGIN);
10728
10729 // / /=
10730 case '/':
10731 if (lex_state_beg_p(parser)) {
10732 lex_mode_push_regexp(parser, '\0', '/');
10733 LEX(PM_TOKEN_REGEXP_BEGIN);
10734 }
10735
10736 if (match(parser, '=')) {
10737 lex_state_set(parser, PM_LEX_STATE_BEG);
10738 LEX(PM_TOKEN_SLASH_EQUAL);
10739 }
10740
10741 if (lex_state_spcarg_p(parser, space_seen)) {
10742 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10743 lex_mode_push_regexp(parser, '\0', '/');
10744 LEX(PM_TOKEN_REGEXP_BEGIN);
10745 }
10746
10747 if (ambiguous_operator_p(parser, space_seen)) {
10748 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10749 }
10750
10751 if (lex_state_operator_p(parser)) {
10752 lex_state_set(parser, PM_LEX_STATE_ARG);
10753 } else {
10754 lex_state_set(parser, PM_LEX_STATE_BEG);
10755 }
10756
10757 LEX(PM_TOKEN_SLASH);
10758
10759 // ^ ^=
10760 case '^':
10761 if (lex_state_operator_p(parser)) {
10762 lex_state_set(parser, PM_LEX_STATE_ARG);
10763 } else {
10764 lex_state_set(parser, PM_LEX_STATE_BEG);
10765 }
10766 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10767
10768 // ~ ~@
10769 case '~':
10770 if (lex_state_operator_p(parser)) {
10771 (void) match(parser, '@');
10772 lex_state_set(parser, PM_LEX_STATE_ARG);
10773 } else {
10774 lex_state_set(parser, PM_LEX_STATE_BEG);
10775 }
10776
10777 LEX(PM_TOKEN_TILDE);
10778
10779 // % %= %i %I %q %Q %w %W
10780 case '%': {
10781 // If there is no subsequent character then we have an
10782 // invalid token. We're going to say it's the percent
10783 // operator because we don't want to move into the string
10784 // lex mode unnecessarily.
10785 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10786 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10787 LEX(PM_TOKEN_PERCENT);
10788 }
10789
10790 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10791 lex_state_set(parser, PM_LEX_STATE_BEG);
10792 LEX(PM_TOKEN_PERCENT_EQUAL);
10793 } else if (
10794 lex_state_beg_p(parser) ||
10795 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10796 lex_state_spcarg_p(parser, space_seen)
10797 ) {
10798 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10799 if (*parser->current.end >= 0x80) {
10800 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10801 }
10802
10803 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10804 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10805 LEX(PM_TOKEN_STRING_BEGIN);
10806 }
10807
10808 // Delimiters for %-literals cannot be alphanumeric. We
10809 // validate that here.
10810 uint8_t delimiter = peek_offset(parser, 1);
10811 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10812 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10813 goto lex_next_token;
10814 }
10815
10816 switch (peek(parser)) {
10817 case 'i': {
10818 parser->current.end++;
10819
10820 if (parser->current.end < parser->end) {
10821 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10822 } else {
10823 lex_mode_push_list_eof(parser);
10824 }
10825
10826 LEX(PM_TOKEN_PERCENT_LOWER_I);
10827 }
10828 case 'I': {
10829 parser->current.end++;
10830
10831 if (parser->current.end < parser->end) {
10832 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10833 } else {
10834 lex_mode_push_list_eof(parser);
10835 }
10836
10837 LEX(PM_TOKEN_PERCENT_UPPER_I);
10838 }
10839 case 'r': {
10840 parser->current.end++;
10841
10842 if (parser->current.end < parser->end) {
10843 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10844 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10845 } else {
10846 lex_mode_push_regexp(parser, '\0', '\0');
10847 }
10848
10849 LEX(PM_TOKEN_REGEXP_BEGIN);
10850 }
10851 case 'q': {
10852 parser->current.end++;
10853
10854 if (parser->current.end < parser->end) {
10855 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10856 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10857 } else {
10858 lex_mode_push_string_eof(parser);
10859 }
10860
10861 LEX(PM_TOKEN_STRING_BEGIN);
10862 }
10863 case 'Q': {
10864 parser->current.end++;
10865
10866 if (parser->current.end < parser->end) {
10867 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10868 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10869 } else {
10870 lex_mode_push_string_eof(parser);
10871 }
10872
10873 LEX(PM_TOKEN_STRING_BEGIN);
10874 }
10875 case 's': {
10876 parser->current.end++;
10877
10878 if (parser->current.end < parser->end) {
10879 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10880 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10881 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10882 } else {
10883 lex_mode_push_string_eof(parser);
10884 }
10885
10886 LEX(PM_TOKEN_SYMBOL_BEGIN);
10887 }
10888 case 'w': {
10889 parser->current.end++;
10890
10891 if (parser->current.end < parser->end) {
10892 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10893 } else {
10894 lex_mode_push_list_eof(parser);
10895 }
10896
10897 LEX(PM_TOKEN_PERCENT_LOWER_W);
10898 }
10899 case 'W': {
10900 parser->current.end++;
10901
10902 if (parser->current.end < parser->end) {
10903 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10904 } else {
10905 lex_mode_push_list_eof(parser);
10906 }
10907
10908 LEX(PM_TOKEN_PERCENT_UPPER_W);
10909 }
10910 case 'x': {
10911 parser->current.end++;
10912
10913 if (parser->current.end < parser->end) {
10914 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10915 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10916 } else {
10917 lex_mode_push_string_eof(parser);
10918 }
10919
10920 LEX(PM_TOKEN_PERCENT_LOWER_X);
10921 }
10922 default:
10923 // If we get to this point, then we have a % that is completely
10924 // unparsable. In this case we'll just drop it from the parser
10925 // and skip past it and hope that the next token is something
10926 // that we can parse.
10927 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10928 goto lex_next_token;
10929 }
10930 }
10931
10932 if (ambiguous_operator_p(parser, space_seen)) {
10933 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10934 }
10935
10936 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10937 LEX(PM_TOKEN_PERCENT);
10938 }
10939
10940 // global variable
10941 case '$': {
10942 pm_token_type_t type = lex_global_variable(parser);
10943
10944 // If we're lexing an embedded variable, then we need to pop back into
10945 // the parent lex context.
10946 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10947 lex_mode_pop(parser);
10948 }
10949
10950 lex_state_set(parser, PM_LEX_STATE_END);
10951 LEX(type);
10952 }
10953
10954 // instance variable, class variable
10955 case '@':
10956 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10957 LEX(lex_at_variable(parser));
10958
10959 default: {
10960 if (*parser->current.start != '_') {
10961 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10962
10963 // If this isn't the beginning of an identifier, then
10964 // it's an invalid token as we've exhausted all of the
10965 // other options. We'll skip past it and return the next
10966 // token after adding an appropriate error message.
10967 if (!width) {
10968 if (*parser->current.start >= 0x80) {
10969 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
10970 } else if (*parser->current.start == '\\') {
10971 switch (peek_at(parser, parser->current.start + 1)) {
10972 case ' ':
10973 parser->current.end++;
10974 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
10975 break;
10976 case '\f':
10977 parser->current.end++;
10978 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
10979 break;
10980 case '\t':
10981 parser->current.end++;
10982 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
10983 break;
10984 case '\v':
10985 parser->current.end++;
10986 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
10987 break;
10988 case '\r':
10989 if (peek_at(parser, parser->current.start + 2) != '\n') {
10990 parser->current.end++;
10991 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
10992 break;
10993 }
10995 default:
10996 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
10997 break;
10998 }
10999 } else if (char_is_ascii_printable(*parser->current.start)) {
11000 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11001 } else {
11002 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11003 }
11004
11005 goto lex_next_token;
11006 }
11007
11008 parser->current.end = parser->current.start + width;
11009 }
11010
11011 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11012
11013 // If we've hit a __END__ and it was at the start of the
11014 // line or the start of the file and it is followed by
11015 // either a \n or a \r\n, then this is the last token of the
11016 // file.
11017 if (
11018 ((parser->current.end - parser->current.start) == 7) &&
11019 current_token_starts_line(parser) &&
11020 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11021 (parser->current.end == parser->end || match_eol(parser))
11022 ) {
11023 // Since we know we're about to add an __END__ comment,
11024 // we know we need to add all of the newlines to get the
11025 // correct column information for it.
11026 const uint8_t *cursor = parser->current.end;
11027 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11028 pm_newline_list_append(&parser->newline_list, cursor++);
11029 }
11030
11031 parser->current.end = parser->end;
11032 parser->current.type = PM_TOKEN___END__;
11033 parser_lex_callback(parser);
11034
11035 parser->data_loc.start = parser->current.start;
11036 parser->data_loc.end = parser->current.end;
11037
11038 LEX(PM_TOKEN_EOF);
11039 }
11040
11041 pm_lex_state_t last_state = parser->lex_state;
11042
11043 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11044 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11045 if (previous_command_start) {
11046 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11047 } else {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 }
11050 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11051 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11052 } else {
11053 lex_state_set(parser, PM_LEX_STATE_END);
11054 }
11055 }
11056
11057 if (
11058 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11059 (type == PM_TOKEN_IDENTIFIER) &&
11060 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11061 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11062 ) {
11063 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11064 }
11065
11066 LEX(type);
11067 }
11068 }
11069 }
11070 case PM_LEX_LIST: {
11071 if (parser->next_start != NULL) {
11072 parser->current.end = parser->next_start;
11073 parser->next_start = NULL;
11074 }
11075
11076 // First we'll set the beginning of the token.
11077 parser->current.start = parser->current.end;
11078
11079 // If there's any whitespace at the start of the list, then we're
11080 // going to trim it off the beginning and create a new token.
11081 size_t whitespace;
11082
11083 if (parser->heredoc_end) {
11084 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11085 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11086 whitespace += 1;
11087 }
11088 } else {
11089 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11090 }
11091
11092 if (whitespace > 0) {
11093 parser->current.end += whitespace;
11094 if (peek_offset(parser, -1) == '\n') {
11095 // mutates next_start
11096 parser_flush_heredoc_end(parser);
11097 }
11098 LEX(PM_TOKEN_WORDS_SEP);
11099 }
11100
11101 // We'll check if we're at the end of the file. If we are, then we
11102 // need to return the EOF token.
11103 if (parser->current.end >= parser->end) {
11104 LEX(PM_TOKEN_EOF);
11105 }
11106
11107 // Here we'll get a list of the places where strpbrk should break,
11108 // and then find the first one.
11109 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11110 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11111 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11112
11113 // If we haven't found an escape yet, then this buffer will be
11114 // unallocated since we can refer directly to the source string.
11115 pm_token_buffer_t token_buffer = { 0 };
11116
11117 while (breakpoint != NULL) {
11118 // If we hit whitespace, then we must have received content by
11119 // now, so we can return an element of the list.
11120 if (pm_char_is_whitespace(*breakpoint)) {
11121 parser->current.end = breakpoint;
11122 pm_token_buffer_flush(parser, &token_buffer);
11123 LEX(PM_TOKEN_STRING_CONTENT);
11124 }
11125
11126 // If we hit the terminator, we need to check which token to
11127 // return.
11128 if (*breakpoint == lex_mode->as.list.terminator) {
11129 // If this terminator doesn't actually close the list, then
11130 // we need to continue on past it.
11131 if (lex_mode->as.list.nesting > 0) {
11132 parser->current.end = breakpoint + 1;
11133 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11134 lex_mode->as.list.nesting--;
11135 continue;
11136 }
11137
11138 // If we've hit the terminator and we've already skipped
11139 // past content, then we can return a list node.
11140 if (breakpoint > parser->current.start) {
11141 parser->current.end = breakpoint;
11142 pm_token_buffer_flush(parser, &token_buffer);
11143 LEX(PM_TOKEN_STRING_CONTENT);
11144 }
11145
11146 // Otherwise, switch back to the default state and return
11147 // the end of the list.
11148 parser->current.end = breakpoint + 1;
11149 lex_mode_pop(parser);
11150 lex_state_set(parser, PM_LEX_STATE_END);
11151 LEX(PM_TOKEN_STRING_END);
11152 }
11153
11154 // If we hit a null byte, skip directly past it.
11155 if (*breakpoint == '\0') {
11156 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11157 continue;
11158 }
11159
11160 // If we hit escapes, then we need to treat the next token
11161 // literally. In this case we'll skip past the next character
11162 // and find the next breakpoint.
11163 if (*breakpoint == '\\') {
11164 parser->current.end = breakpoint + 1;
11165
11166 // If we've hit the end of the file, then break out of the
11167 // loop by setting the breakpoint to NULL.
11168 if (parser->current.end == parser->end) {
11169 breakpoint = NULL;
11170 continue;
11171 }
11172
11173 pm_token_buffer_escape(parser, &token_buffer);
11174 uint8_t peeked = peek(parser);
11175
11176 switch (peeked) {
11177 case ' ':
11178 case '\f':
11179 case '\t':
11180 case '\v':
11181 case '\\':
11182 pm_token_buffer_push_byte(&token_buffer, peeked);
11183 parser->current.end++;
11184 break;
11185 case '\r':
11186 parser->current.end++;
11187 if (peek(parser) != '\n') {
11188 pm_token_buffer_push_byte(&token_buffer, '\r');
11189 break;
11190 }
11192 case '\n':
11193 pm_token_buffer_push_byte(&token_buffer, '\n');
11194
11195 if (parser->heredoc_end) {
11196 // ... if we are on the same line as a heredoc,
11197 // flush the heredoc and continue parsing after
11198 // heredoc_end.
11199 parser_flush_heredoc_end(parser);
11200 pm_token_buffer_copy(parser, &token_buffer);
11201 LEX(PM_TOKEN_STRING_CONTENT);
11202 } else {
11203 // ... else track the newline.
11204 pm_newline_list_append(&parser->newline_list, parser->current.end);
11205 }
11206
11207 parser->current.end++;
11208 break;
11209 default:
11210 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11211 pm_token_buffer_push_byte(&token_buffer, peeked);
11212 parser->current.end++;
11213 } else if (lex_mode->as.list.interpolation) {
11214 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11215 } else {
11216 pm_token_buffer_push_byte(&token_buffer, '\\');
11217 pm_token_buffer_push_escaped(&token_buffer, parser);
11218 }
11219
11220 break;
11221 }
11222
11223 token_buffer.cursor = parser->current.end;
11224 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11225 continue;
11226 }
11227
11228 // If we hit a #, then we will attempt to lex interpolation.
11229 if (*breakpoint == '#') {
11230 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11231
11232 if (type == PM_TOKEN_NOT_PROVIDED) {
11233 // If we haven't returned at this point then we had something
11234 // that looked like an interpolated class or instance variable
11235 // like "#@" but wasn't actually. In this case we'll just skip
11236 // to the next breakpoint.
11237 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11238 continue;
11239 }
11240
11241 if (type == PM_TOKEN_STRING_CONTENT) {
11242 pm_token_buffer_flush(parser, &token_buffer);
11243 }
11244
11245 LEX(type);
11246 }
11247
11248 // If we've hit the incrementor, then we need to skip past it
11249 // and find the next breakpoint.
11250 assert(*breakpoint == lex_mode->as.list.incrementor);
11251 parser->current.end = breakpoint + 1;
11252 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11253 lex_mode->as.list.nesting++;
11254 continue;
11255 }
11256
11257 if (parser->current.end > parser->current.start) {
11258 pm_token_buffer_flush(parser, &token_buffer);
11259 LEX(PM_TOKEN_STRING_CONTENT);
11260 }
11261
11262 // If we were unable to find a breakpoint, then this token hits the
11263 // end of the file.
11264 parser->current.end = parser->end;
11265 pm_token_buffer_flush(parser, &token_buffer);
11266 LEX(PM_TOKEN_STRING_CONTENT);
11267 }
11268 case PM_LEX_REGEXP: {
11269 // First, we'll set to start of this token to be the current end.
11270 if (parser->next_start == NULL) {
11271 parser->current.start = parser->current.end;
11272 } else {
11273 parser->current.start = parser->next_start;
11274 parser->current.end = parser->next_start;
11275 parser->next_start = NULL;
11276 }
11277
11278 // We'll check if we're at the end of the file. If we are, then we
11279 // need to return the EOF token.
11280 if (parser->current.end >= parser->end) {
11281 LEX(PM_TOKEN_EOF);
11282 }
11283
11284 // Get a reference to the current mode.
11285 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11286
11287 // These are the places where we need to split up the content of the
11288 // regular expression. We'll use strpbrk to find the first of these
11289 // characters.
11290 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11291 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11292 pm_regexp_token_buffer_t token_buffer = { 0 };
11293
11294 while (breakpoint != NULL) {
11295 uint8_t term = lex_mode->as.regexp.terminator;
11296 bool is_terminator = (*breakpoint == term);
11297
11298 // If the terminator is newline, we need to consider \r\n _also_ a newline
11299 // For example: `%\nfoo\r\n`
11300 // The string should be "foo", not "foo\r"
11301 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11302 if (term == '\n') {
11303 is_terminator = true;
11304 }
11305
11306 // If the terminator is a CR, but we see a CRLF, we need to
11307 // treat the CRLF as a newline, meaning this is _not_ the
11308 // terminator
11309 if (term == '\r') {
11310 is_terminator = false;
11311 }
11312 }
11313
11314 // If we hit the terminator, we need to determine what kind of
11315 // token to return.
11316 if (is_terminator) {
11317 if (lex_mode->as.regexp.nesting > 0) {
11318 parser->current.end = breakpoint + 1;
11319 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11320 lex_mode->as.regexp.nesting--;
11321 continue;
11322 }
11323
11324 // Here we've hit the terminator. If we have already consumed
11325 // content then we need to return that content as string content
11326 // first.
11327 if (breakpoint > parser->current.start) {
11328 parser->current.end = breakpoint;
11329 pm_regexp_token_buffer_flush(parser, &token_buffer);
11330 LEX(PM_TOKEN_STRING_CONTENT);
11331 }
11332
11333 // Check here if we need to track the newline.
11334 size_t eol_length = match_eol_at(parser, breakpoint);
11335 if (eol_length) {
11336 parser->current.end = breakpoint + eol_length;
11337
11338 // Track the newline if we're not in a heredoc that
11339 // would have already have added the newline to the
11340 // list.
11341 if (parser->heredoc_end == NULL) {
11342 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11343 }
11344 } else {
11345 parser->current.end = breakpoint + 1;
11346 }
11347
11348 // Since we've hit the terminator of the regular expression,
11349 // we now need to parse the options.
11350 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11351
11352 lex_mode_pop(parser);
11353 lex_state_set(parser, PM_LEX_STATE_END);
11354 LEX(PM_TOKEN_REGEXP_END);
11355 }
11356
11357 // If we've hit the incrementor, then we need to skip past it
11358 // and find the next breakpoint.
11359 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11360 parser->current.end = breakpoint + 1;
11361 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11362 lex_mode->as.regexp.nesting++;
11363 continue;
11364 }
11365
11366 switch (*breakpoint) {
11367 case '\0':
11368 // If we hit a null byte, skip directly past it.
11369 parser->current.end = breakpoint + 1;
11370 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11371 break;
11372 case '\r':
11373 if (peek_at(parser, breakpoint + 1) != '\n') {
11374 parser->current.end = breakpoint + 1;
11375 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11376 break;
11377 }
11378
11379 breakpoint++;
11380 parser->current.end = breakpoint;
11381 pm_regexp_token_buffer_escape(parser, &token_buffer);
11382 token_buffer.base.cursor = breakpoint;
11383
11385 case '\n':
11386 // If we've hit a newline, then we need to track that in
11387 // the list of newlines.
11388 if (parser->heredoc_end == NULL) {
11389 pm_newline_list_append(&parser->newline_list, breakpoint);
11390 parser->current.end = breakpoint + 1;
11391 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11392 break;
11393 }
11394
11395 parser->current.end = breakpoint + 1;
11396 parser_flush_heredoc_end(parser);
11397 pm_regexp_token_buffer_flush(parser, &token_buffer);
11398 LEX(PM_TOKEN_STRING_CONTENT);
11399 case '\\': {
11400 // If we hit escapes, then we need to treat the next
11401 // token literally. In this case we'll skip past the
11402 // next character and find the next breakpoint.
11403 parser->current.end = breakpoint + 1;
11404
11405 // If we've hit the end of the file, then break out of
11406 // the loop by setting the breakpoint to NULL.
11407 if (parser->current.end == parser->end) {
11408 breakpoint = NULL;
11409 break;
11410 }
11411
11412 pm_regexp_token_buffer_escape(parser, &token_buffer);
11413 uint8_t peeked = peek(parser);
11414
11415 switch (peeked) {
11416 case '\r':
11417 parser->current.end++;
11418 if (peek(parser) != '\n') {
11419 if (lex_mode->as.regexp.terminator != '\r') {
11420 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11421 }
11422 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11423 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11424 break;
11425 }
11427 case '\n':
11428 if (parser->heredoc_end) {
11429 // ... if we are on the same line as a heredoc,
11430 // flush the heredoc and continue parsing after
11431 // heredoc_end.
11432 parser_flush_heredoc_end(parser);
11433 pm_regexp_token_buffer_copy(parser, &token_buffer);
11434 LEX(PM_TOKEN_STRING_CONTENT);
11435 } else {
11436 // ... else track the newline.
11437 pm_newline_list_append(&parser->newline_list, parser->current.end);
11438 }
11439
11440 parser->current.end++;
11441 break;
11442 case 'c':
11443 case 'C':
11444 case 'M':
11445 case 'u':
11446 case 'x':
11447 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11448 break;
11449 default:
11450 if (lex_mode->as.regexp.terminator == peeked) {
11451 // Some characters when they are used as the
11452 // terminator also receive an escape. They are
11453 // enumerated here.
11454 switch (peeked) {
11455 case '$': case ')': case '*': case '+':
11456 case '.': case '>': case '?': case ']':
11457 case '^': case '|': case '}':
11458 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11459 break;
11460 default:
11461 break;
11462 }
11463
11464 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11465 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11466 parser->current.end++;
11467 break;
11468 }
11469
11470 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11471 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11472 break;
11473 }
11474
11475 token_buffer.base.cursor = parser->current.end;
11476 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11477 break;
11478 }
11479 case '#': {
11480 // If we hit a #, then we will attempt to lex
11481 // interpolation.
11482 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11483
11484 if (type == PM_TOKEN_NOT_PROVIDED) {
11485 // If we haven't returned at this point then we had
11486 // something that looked like an interpolated class or
11487 // instance variable like "#@" but wasn't actually. In
11488 // this case we'll just skip to the next breakpoint.
11489 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11490 break;
11491 }
11492
11493 if (type == PM_TOKEN_STRING_CONTENT) {
11494 pm_regexp_token_buffer_flush(parser, &token_buffer);
11495 }
11496
11497 LEX(type);
11498 }
11499 default:
11500 assert(false && "unreachable");
11501 break;
11502 }
11503 }
11504
11505 if (parser->current.end > parser->current.start) {
11506 pm_regexp_token_buffer_flush(parser, &token_buffer);
11507 LEX(PM_TOKEN_STRING_CONTENT);
11508 }
11509
11510 // If we were unable to find a breakpoint, then this token hits the
11511 // end of the file.
11512 parser->current.end = parser->end;
11513 pm_regexp_token_buffer_flush(parser, &token_buffer);
11514 LEX(PM_TOKEN_STRING_CONTENT);
11515 }
11516 case PM_LEX_STRING: {
11517 // First, we'll set to start of this token to be the current end.
11518 if (parser->next_start == NULL) {
11519 parser->current.start = parser->current.end;
11520 } else {
11521 parser->current.start = parser->next_start;
11522 parser->current.end = parser->next_start;
11523 parser->next_start = NULL;
11524 }
11525
11526 // We'll check if we're at the end of the file. If we are, then we need to
11527 // return the EOF token.
11528 if (parser->current.end >= parser->end) {
11529 LEX(PM_TOKEN_EOF);
11530 }
11531
11532 // These are the places where we need to split up the content of the
11533 // string. We'll use strpbrk to find the first of these characters.
11534 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11535 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11536 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11537
11538 // If we haven't found an escape yet, then this buffer will be
11539 // unallocated since we can refer directly to the source string.
11540 pm_token_buffer_t token_buffer = { 0 };
11541
11542 while (breakpoint != NULL) {
11543 // If we hit the incrementor, then we'll increment then nesting and
11544 // continue lexing.
11545 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11546 lex_mode->as.string.nesting++;
11547 parser->current.end = breakpoint + 1;
11548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11549 continue;
11550 }
11551
11552 uint8_t term = lex_mode->as.string.terminator;
11553 bool is_terminator = (*breakpoint == term);
11554
11555 // If the terminator is newline, we need to consider \r\n _also_ a newline
11556 // For example: `%r\nfoo\r\n`
11557 // The string should be /foo/, not /foo\r/
11558 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11559 if (term == '\n') {
11560 is_terminator = true;
11561 }
11562
11563 // If the terminator is a CR, but we see a CRLF, we need to
11564 // treat the CRLF as a newline, meaning this is _not_ the
11565 // terminator
11566 if (term == '\r') {
11567 is_terminator = false;
11568 }
11569 }
11570
11571 // Note that we have to check the terminator here first because we could
11572 // potentially be parsing a % string that has a # character as the
11573 // terminator.
11574 if (is_terminator) {
11575 // If this terminator doesn't actually close the string, then we need
11576 // to continue on past it.
11577 if (lex_mode->as.string.nesting > 0) {
11578 parser->current.end = breakpoint + 1;
11579 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11580 lex_mode->as.string.nesting--;
11581 continue;
11582 }
11583
11584 // Here we've hit the terminator. If we have already consumed content
11585 // then we need to return that content as string content first.
11586 if (breakpoint > parser->current.start) {
11587 parser->current.end = breakpoint;
11588 pm_token_buffer_flush(parser, &token_buffer);
11589 LEX(PM_TOKEN_STRING_CONTENT);
11590 }
11591
11592 // Otherwise we need to switch back to the parent lex mode and
11593 // return the end of the string.
11594 size_t eol_length = match_eol_at(parser, breakpoint);
11595 if (eol_length) {
11596 parser->current.end = breakpoint + eol_length;
11597
11598 // Track the newline if we're not in a heredoc that
11599 // would have already have added the newline to the
11600 // list.
11601 if (parser->heredoc_end == NULL) {
11602 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11603 }
11604 } else {
11605 parser->current.end = breakpoint + 1;
11606 }
11607
11608 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11609 parser->current.end++;
11610 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11611 lex_mode_pop(parser);
11612 LEX(PM_TOKEN_LABEL_END);
11613 }
11614
11615 // When the delimiter itself is a newline, we won't
11616 // get a chance to flush heredocs in the usual places since
11617 // the newline is already consumed.
11618 if (term == '\n' && parser->heredoc_end) {
11619 parser_flush_heredoc_end(parser);
11620 }
11621
11622 lex_state_set(parser, PM_LEX_STATE_END);
11623 lex_mode_pop(parser);
11624 LEX(PM_TOKEN_STRING_END);
11625 }
11626
11627 switch (*breakpoint) {
11628 case '\0':
11629 // Skip directly past the null character.
11630 parser->current.end = breakpoint + 1;
11631 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11632 break;
11633 case '\r':
11634 if (peek_at(parser, breakpoint + 1) != '\n') {
11635 parser->current.end = breakpoint + 1;
11636 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11637 break;
11638 }
11639
11640 // If we hit a \r\n sequence, then we need to treat it
11641 // as a newline.
11642 breakpoint++;
11643 parser->current.end = breakpoint;
11644 pm_token_buffer_escape(parser, &token_buffer);
11645 token_buffer.cursor = breakpoint;
11646
11648 case '\n':
11649 // When we hit a newline, we need to flush any potential
11650 // heredocs. Note that this has to happen after we check
11651 // for the terminator in case the terminator is a
11652 // newline character.
11653 if (parser->heredoc_end == NULL) {
11654 pm_newline_list_append(&parser->newline_list, breakpoint);
11655 parser->current.end = breakpoint + 1;
11656 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11657 break;
11658 }
11659
11660 parser->current.end = breakpoint + 1;
11661 parser_flush_heredoc_end(parser);
11662 pm_token_buffer_flush(parser, &token_buffer);
11663 LEX(PM_TOKEN_STRING_CONTENT);
11664 case '\\': {
11665 // Here we hit escapes.
11666 parser->current.end = breakpoint + 1;
11667
11668 // If we've hit the end of the file, then break out of
11669 // the loop by setting the breakpoint to NULL.
11670 if (parser->current.end == parser->end) {
11671 breakpoint = NULL;
11672 continue;
11673 }
11674
11675 pm_token_buffer_escape(parser, &token_buffer);
11676 uint8_t peeked = peek(parser);
11677
11678 switch (peeked) {
11679 case '\\':
11680 pm_token_buffer_push_byte(&token_buffer, '\\');
11681 parser->current.end++;
11682 break;
11683 case '\r':
11684 parser->current.end++;
11685 if (peek(parser) != '\n') {
11686 if (!lex_mode->as.string.interpolation) {
11687 pm_token_buffer_push_byte(&token_buffer, '\\');
11688 }
11689 pm_token_buffer_push_byte(&token_buffer, '\r');
11690 break;
11691 }
11693 case '\n':
11694 if (!lex_mode->as.string.interpolation) {
11695 pm_token_buffer_push_byte(&token_buffer, '\\');
11696 pm_token_buffer_push_byte(&token_buffer, '\n');
11697 }
11698
11699 if (parser->heredoc_end) {
11700 // ... if we are on the same line as a heredoc,
11701 // flush the heredoc and continue parsing after
11702 // heredoc_end.
11703 parser_flush_heredoc_end(parser);
11704 pm_token_buffer_copy(parser, &token_buffer);
11705 LEX(PM_TOKEN_STRING_CONTENT);
11706 } else {
11707 // ... else track the newline.
11708 pm_newline_list_append(&parser->newline_list, parser->current.end);
11709 }
11710
11711 parser->current.end++;
11712 break;
11713 default:
11714 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11715 pm_token_buffer_push_byte(&token_buffer, peeked);
11716 parser->current.end++;
11717 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11718 pm_token_buffer_push_byte(&token_buffer, peeked);
11719 parser->current.end++;
11720 } else if (lex_mode->as.string.interpolation) {
11721 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11722 } else {
11723 pm_token_buffer_push_byte(&token_buffer, '\\');
11724 pm_token_buffer_push_escaped(&token_buffer, parser);
11725 }
11726
11727 break;
11728 }
11729
11730 token_buffer.cursor = parser->current.end;
11731 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11732 break;
11733 }
11734 case '#': {
11735 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11736
11737 if (type == PM_TOKEN_NOT_PROVIDED) {
11738 // If we haven't returned at this point then we had something that
11739 // looked like an interpolated class or instance variable like "#@"
11740 // but wasn't actually. In this case we'll just skip to the next
11741 // breakpoint.
11742 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11743 break;
11744 }
11745
11746 if (type == PM_TOKEN_STRING_CONTENT) {
11747 pm_token_buffer_flush(parser, &token_buffer);
11748 }
11749
11750 LEX(type);
11751 }
11752 default:
11753 assert(false && "unreachable");
11754 }
11755 }
11756
11757 if (parser->current.end > parser->current.start) {
11758 pm_token_buffer_flush(parser, &token_buffer);
11759 LEX(PM_TOKEN_STRING_CONTENT);
11760 }
11761
11762 // If we've hit the end of the string, then this is an unterminated
11763 // string. In that case we'll return a string content token.
11764 parser->current.end = parser->end;
11765 pm_token_buffer_flush(parser, &token_buffer);
11766 LEX(PM_TOKEN_STRING_CONTENT);
11767 }
11768 case PM_LEX_HEREDOC: {
11769 // First, we'll set to start of this token.
11770 if (parser->next_start == NULL) {
11771 parser->current.start = parser->current.end;
11772 } else {
11773 parser->current.start = parser->next_start;
11774 parser->current.end = parser->next_start;
11775 parser->heredoc_end = NULL;
11776 parser->next_start = NULL;
11777 }
11778
11779 // Now let's grab the information about the identifier off of the
11780 // current lex mode.
11781 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11782 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11783
11784 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11785 lex_mode->as.heredoc.line_continuation = false;
11786
11787 // We'll check if we're at the end of the file. If we are, then we
11788 // will add an error (because we weren't able to find the
11789 // terminator) but still continue parsing so that content after the
11790 // declaration of the heredoc can be parsed.
11791 if (parser->current.end >= parser->end) {
11792 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11793 parser->next_start = lex_mode->as.heredoc.next_start;
11794 parser->heredoc_end = parser->current.end;
11795 lex_state_set(parser, PM_LEX_STATE_END);
11796 lex_mode_pop(parser);
11797 LEX(PM_TOKEN_HEREDOC_END);
11798 }
11799
11800 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11801 size_t ident_length = heredoc_lex_mode->ident_length;
11802
11803 // If we are immediately following a newline and we have hit the
11804 // terminator, then we need to return the ending of the heredoc.
11805 if (current_token_starts_line(parser)) {
11806 const uint8_t *start = parser->current.start;
11807
11808 if (!line_continuation && (start + ident_length <= parser->end)) {
11809 const uint8_t *newline = next_newline(start, parser->end - start);
11810 const uint8_t *ident_end = newline;
11811 const uint8_t *terminator_end = newline;
11812
11813 if (newline == NULL) {
11814 terminator_end = parser->end;
11815 ident_end = parser->end;
11816 } else {
11817 terminator_end++;
11818 if (newline[-1] == '\r') {
11819 ident_end--; // Remove \r
11820 }
11821 }
11822
11823 const uint8_t *terminator_start = ident_end - ident_length;
11824 const uint8_t *cursor = start;
11825
11826 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11827 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11828 cursor++;
11829 }
11830 }
11831
11832 if (
11833 (cursor == terminator_start) &&
11834 (memcmp(terminator_start, ident_start, ident_length) == 0)
11835 ) {
11836 if (newline != NULL) {
11837 pm_newline_list_append(&parser->newline_list, newline);
11838 }
11839
11840 parser->current.end = terminator_end;
11841 if (*lex_mode->as.heredoc.next_start == '\\') {
11842 parser->next_start = NULL;
11843 } else {
11844 parser->next_start = lex_mode->as.heredoc.next_start;
11845 parser->heredoc_end = parser->current.end;
11846 }
11847
11848 lex_state_set(parser, PM_LEX_STATE_END);
11849 lex_mode_pop(parser);
11850 LEX(PM_TOKEN_HEREDOC_END);
11851 }
11852 }
11853
11854 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11855 if (
11856 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11857 lex_mode->as.heredoc.common_whitespace != NULL &&
11858 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11859 peek_at(parser, start) != '\n'
11860 ) {
11861 *lex_mode->as.heredoc.common_whitespace = whitespace;
11862 }
11863 }
11864
11865 // Otherwise we'll be parsing string content. These are the places
11866 // where we need to split up the content of the heredoc. We'll use
11867 // strpbrk to find the first of these characters.
11868 uint8_t breakpoints[] = "\r\n\\#";
11869
11870 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11871 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11872 breakpoints[3] = '\0';
11873 }
11874
11875 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11876 pm_token_buffer_t token_buffer = { 0 };
11877 bool was_line_continuation = false;
11878
11879 while (breakpoint != NULL) {
11880 switch (*breakpoint) {
11881 case '\0':
11882 // Skip directly past the null character.
11883 parser->current.end = breakpoint + 1;
11884 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11885 break;
11886 case '\r':
11887 parser->current.end = breakpoint + 1;
11888
11889 if (peek_at(parser, breakpoint + 1) != '\n') {
11890 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11891 break;
11892 }
11893
11894 // If we hit a \r\n sequence, then we want to replace it
11895 // with a single \n character in the final string.
11896 breakpoint++;
11897 pm_token_buffer_escape(parser, &token_buffer);
11898 token_buffer.cursor = breakpoint;
11899
11901 case '\n': {
11902 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11903 parser_flush_heredoc_end(parser);
11904 parser->current.end = breakpoint + 1;
11905 pm_token_buffer_flush(parser, &token_buffer);
11906 LEX(PM_TOKEN_STRING_CONTENT);
11907 }
11908
11909 pm_newline_list_append(&parser->newline_list, breakpoint);
11910
11911 // If we have a - or ~ heredoc, then we can match after
11912 // some leading whitespace.
11913 const uint8_t *start = breakpoint + 1;
11914
11915 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11916 // We want to match the terminator starting from the end of the line in case
11917 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11918 const uint8_t *newline = next_newline(start, parser->end - start);
11919
11920 if (newline == NULL) {
11921 newline = parser->end;
11922 } else if (newline[-1] == '\r') {
11923 newline--; // Remove \r
11924 }
11925
11926 // Start of a possible terminator.
11927 const uint8_t *terminator_start = newline - ident_length;
11928
11929 // Cursor to check for the leading whitespace. We skip the
11930 // leading whitespace if we have a - or ~ heredoc.
11931 const uint8_t *cursor = start;
11932
11933 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11934 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11935 cursor++;
11936 }
11937 }
11938
11939 if (
11940 cursor == terminator_start &&
11941 (memcmp(terminator_start, ident_start, ident_length) == 0)
11942 ) {
11943 parser->current.end = breakpoint + 1;
11944 pm_token_buffer_flush(parser, &token_buffer);
11945 LEX(PM_TOKEN_STRING_CONTENT);
11946 }
11947 }
11948
11949 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11950
11951 // If we have hit a newline that is followed by a valid
11952 // terminator, then we need to return the content of the
11953 // heredoc here as string content. Then, the next time a
11954 // token is lexed, it will match again and return the
11955 // end of the heredoc.
11956 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11957 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11958 *lex_mode->as.heredoc.common_whitespace = whitespace;
11959 }
11960
11961 parser->current.end = breakpoint + 1;
11962 pm_token_buffer_flush(parser, &token_buffer);
11963 LEX(PM_TOKEN_STRING_CONTENT);
11964 }
11965
11966 // Otherwise we hit a newline and it wasn't followed by
11967 // a terminator, so we can continue parsing.
11968 parser->current.end = breakpoint + 1;
11969 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11970 break;
11971 }
11972 case '\\': {
11973 // If we hit an escape, then we need to skip past
11974 // however many characters the escape takes up. However
11975 // it's important that if \n or \r\n are escaped, we
11976 // stop looping before the newline and not after the
11977 // newline so that we can still potentially find the
11978 // terminator of the heredoc.
11979 parser->current.end = breakpoint + 1;
11980
11981 // If we've hit the end of the file, then break out of
11982 // the loop by setting the breakpoint to NULL.
11983 if (parser->current.end == parser->end) {
11984 breakpoint = NULL;
11985 continue;
11986 }
11987
11988 pm_token_buffer_escape(parser, &token_buffer);
11989 uint8_t peeked = peek(parser);
11990
11991 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11992 switch (peeked) {
11993 case '\r':
11994 parser->current.end++;
11995 if (peek(parser) != '\n') {
11996 pm_token_buffer_push_byte(&token_buffer, '\\');
11997 pm_token_buffer_push_byte(&token_buffer, '\r');
11998 break;
11999 }
12001 case '\n':
12002 pm_token_buffer_push_byte(&token_buffer, '\\');
12003 pm_token_buffer_push_byte(&token_buffer, '\n');
12004 token_buffer.cursor = parser->current.end + 1;
12005 breakpoint = parser->current.end;
12006 continue;
12007 default:
12008 pm_token_buffer_push_byte(&token_buffer, '\\');
12009 pm_token_buffer_push_escaped(&token_buffer, parser);
12010 break;
12011 }
12012 } else {
12013 switch (peeked) {
12014 case '\r':
12015 parser->current.end++;
12016 if (peek(parser) != '\n') {
12017 pm_token_buffer_push_byte(&token_buffer, '\r');
12018 break;
12019 }
12021 case '\n':
12022 // If we are in a tilde here, we should
12023 // break out of the loop and return the
12024 // string content.
12025 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12026 const uint8_t *end = parser->current.end;
12027 pm_newline_list_append(&parser->newline_list, end);
12028
12029 // Here we want the buffer to only
12030 // include up to the backslash.
12031 parser->current.end = breakpoint;
12032 pm_token_buffer_flush(parser, &token_buffer);
12033
12034 // Now we can advance the end of the
12035 // token past the newline.
12036 parser->current.end = end + 1;
12037 lex_mode->as.heredoc.line_continuation = true;
12038 LEX(PM_TOKEN_STRING_CONTENT);
12039 }
12040
12041 was_line_continuation = true;
12042 token_buffer.cursor = parser->current.end + 1;
12043 breakpoint = parser->current.end;
12044 continue;
12045 default:
12046 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12047 break;
12048 }
12049 }
12050
12051 token_buffer.cursor = parser->current.end;
12052 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12053 break;
12054 }
12055 case '#': {
12056 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12057
12058 if (type == PM_TOKEN_NOT_PROVIDED) {
12059 // If we haven't returned at this point then we had
12060 // something that looked like an interpolated class
12061 // or instance variable like "#@" but wasn't
12062 // actually. In this case we'll just skip to the
12063 // next breakpoint.
12064 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12065 break;
12066 }
12067
12068 if (type == PM_TOKEN_STRING_CONTENT) {
12069 pm_token_buffer_flush(parser, &token_buffer);
12070 }
12071
12072 LEX(type);
12073 }
12074 default:
12075 assert(false && "unreachable");
12076 }
12077
12078 was_line_continuation = false;
12079 }
12080
12081 if (parser->current.end > parser->current.start) {
12082 parser->current.end = parser->end;
12083 pm_token_buffer_flush(parser, &token_buffer);
12084 LEX(PM_TOKEN_STRING_CONTENT);
12085 }
12086
12087 // If we've hit the end of the string, then this is an unterminated
12088 // heredoc. In that case we'll return a string content token.
12089 parser->current.end = parser->end;
12090 pm_token_buffer_flush(parser, &token_buffer);
12091 LEX(PM_TOKEN_STRING_CONTENT);
12092 }
12093 }
12094
12095 assert(false && "unreachable");
12096}
12097
12098#undef LEX
12099
12100/******************************************************************************/
12101/* Parse functions */
12102/******************************************************************************/
12103
12112typedef enum {
12113 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12114 PM_BINDING_POWER_STATEMENT = 2,
12115 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12116 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12117 PM_BINDING_POWER_COMPOSITION = 8, // and or
12118 PM_BINDING_POWER_NOT = 10, // not
12119 PM_BINDING_POWER_MATCH = 12, // => in
12120 PM_BINDING_POWER_DEFINED = 14, // defined?
12121 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12122 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12123 PM_BINDING_POWER_TERNARY = 20, // ?:
12124 PM_BINDING_POWER_RANGE = 22, // .. ...
12125 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12126 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12127 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12128 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12129 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12130 PM_BINDING_POWER_BITWISE_AND = 34, // &
12131 PM_BINDING_POWER_SHIFT = 36, // << >>
12132 PM_BINDING_POWER_TERM = 38, // + -
12133 PM_BINDING_POWER_FACTOR = 40, // * / %
12134 PM_BINDING_POWER_UMINUS = 42, // -@
12135 PM_BINDING_POWER_EXPONENT = 44, // **
12136 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12137 PM_BINDING_POWER_INDEX = 48, // [] []=
12138 PM_BINDING_POWER_CALL = 50, // :: .
12139 PM_BINDING_POWER_MAX = 52
12140} pm_binding_power_t;
12141
12146typedef struct {
12148 pm_binding_power_t left;
12149
12151 pm_binding_power_t right;
12152
12155
12162
12163#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12164#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12165#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12166#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12167#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12168
12169pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12170 // rescue
12171 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12172
12173 // if unless until while
12174 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12175 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12176 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12177 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12178
12179 // and or
12180 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12181 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12182
12183 // => in
12184 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12185 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12186
12187 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12188 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12189 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12190 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12191 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12192 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12193 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12194 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12195 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12196 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12197 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12198 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12199 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12200 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12201 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12202
12203 // ?:
12204 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12205
12206 // .. ...
12207 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12208 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12209 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12210 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12211
12212 // ||
12213 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12214
12215 // &&
12216 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12217
12218 // != !~ == === =~ <=>
12219 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12220 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12221 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12222 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12223 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12224 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12225
12226 // > >= < <=
12227 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12228 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12229 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12230 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12231
12232 // ^ |
12233 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12234 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12235
12236 // &
12237 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12238
12239 // >> <<
12240 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12241 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12242
12243 // - +
12244 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12245 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12246
12247 // % / *
12248 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12249 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12250 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12251 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12252
12253 // -@
12254 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12255 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12256
12257 // **
12258 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12259 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12260
12261 // ! ~ +@
12262 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12263 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12264 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12265
12266 // [
12267 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12268
12269 // :: . &.
12270 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12271 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12272 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12273};
12274
12275#undef BINDING_POWER_ASSIGNMENT
12276#undef LEFT_ASSOCIATIVE
12277#undef RIGHT_ASSOCIATIVE
12278#undef RIGHT_ASSOCIATIVE_UNARY
12279
12283static inline bool
12284match1(const pm_parser_t *parser, pm_token_type_t type) {
12285 return parser->current.type == type;
12286}
12287
12291static inline bool
12292match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12293 return match1(parser, type1) || match1(parser, type2);
12294}
12295
12299static inline bool
12300match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12301 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12302}
12303
12307static inline bool
12308match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12309 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12310}
12311
12315static inline bool
12316match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12317 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12318}
12319
12323static inline bool
12324match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12325 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12326}
12327
12334static bool
12335accept1(pm_parser_t *parser, pm_token_type_t type) {
12336 if (match1(parser, type)) {
12337 parser_lex(parser);
12338 return true;
12339 }
12340 return false;
12341}
12342
12347static inline bool
12348accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12349 if (match2(parser, type1, type2)) {
12350 parser_lex(parser);
12351 return true;
12352 }
12353 return false;
12354}
12355
12367static void
12368expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12369 if (accept1(parser, type)) return;
12370
12371 const uint8_t *location = parser->previous.end;
12372 pm_parser_err(parser, location, location, diag_id);
12373
12374 parser->previous.start = location;
12375 parser->previous.type = PM_TOKEN_MISSING;
12376}
12377
12382static void
12383expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12384 if (accept2(parser, type1, type2)) return;
12385
12386 const uint8_t *location = parser->previous.end;
12387 pm_parser_err(parser, location, location, diag_id);
12388
12389 parser->previous.start = location;
12390 parser->previous.type = PM_TOKEN_MISSING;
12391}
12392
12397static void
12398expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12399 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12400 parser_lex(parser);
12401 } else {
12402 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12403 parser->previous.start = parser->previous.end;
12404 parser->previous.type = PM_TOKEN_MISSING;
12405 }
12406}
12407
12408static pm_node_t *
12409parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12410
12415static pm_node_t *
12416parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12417 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12418 pm_assert_value_expression(parser, node);
12419 return node;
12420}
12421
12440static inline bool
12441token_begins_expression_p(pm_token_type_t type) {
12442 switch (type) {
12443 case PM_TOKEN_EQUAL_GREATER:
12444 case PM_TOKEN_KEYWORD_IN:
12445 // We need to special case this because it is a binary operator that
12446 // should not be marked as beginning an expression.
12447 return false;
12448 case PM_TOKEN_BRACE_RIGHT:
12449 case PM_TOKEN_BRACKET_RIGHT:
12450 case PM_TOKEN_COLON:
12451 case PM_TOKEN_COMMA:
12452 case PM_TOKEN_EMBEXPR_END:
12453 case PM_TOKEN_EOF:
12454 case PM_TOKEN_LAMBDA_BEGIN:
12455 case PM_TOKEN_KEYWORD_DO:
12456 case PM_TOKEN_KEYWORD_DO_LOOP:
12457 case PM_TOKEN_KEYWORD_END:
12458 case PM_TOKEN_KEYWORD_ELSE:
12459 case PM_TOKEN_KEYWORD_ELSIF:
12460 case PM_TOKEN_KEYWORD_ENSURE:
12461 case PM_TOKEN_KEYWORD_THEN:
12462 case PM_TOKEN_KEYWORD_RESCUE:
12463 case PM_TOKEN_KEYWORD_WHEN:
12464 case PM_TOKEN_NEWLINE:
12465 case PM_TOKEN_PARENTHESIS_RIGHT:
12466 case PM_TOKEN_SEMICOLON:
12467 // The reason we need this short-circuit is because we're using the
12468 // binding powers table to tell us if the subsequent token could
12469 // potentially be the start of an expression. If there _is_ a binding
12470 // power for one of these tokens, then we should remove it from this list
12471 // and let it be handled by the default case below.
12472 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12473 return false;
12474 case PM_TOKEN_UAMPERSAND:
12475 // This is a special case because this unary operator cannot appear
12476 // as a general operator, it only appears in certain circumstances.
12477 return false;
12478 case PM_TOKEN_UCOLON_COLON:
12479 case PM_TOKEN_UMINUS:
12480 case PM_TOKEN_UMINUS_NUM:
12481 case PM_TOKEN_UPLUS:
12482 case PM_TOKEN_BANG:
12483 case PM_TOKEN_TILDE:
12484 case PM_TOKEN_UDOT_DOT:
12485 case PM_TOKEN_UDOT_DOT_DOT:
12486 // These unary tokens actually do have binding power associated with them
12487 // so that we can correctly place them into the precedence order. But we
12488 // want them to be marked as beginning an expression, so we need to
12489 // special case them here.
12490 return true;
12491 default:
12492 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12493 }
12494}
12495
12500static pm_node_t *
12501parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12502 if (accept1(parser, PM_TOKEN_USTAR)) {
12503 pm_token_t operator = parser->previous;
12504 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12505 return UP(pm_splat_node_create(parser, &operator, expression));
12506 }
12507
12508 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12509}
12510
12511static bool
12512pm_node_unreference_each(const pm_node_t *node, void *data) {
12513 switch (PM_NODE_TYPE(node)) {
12514 /* When we are about to destroy a set of nodes that could potentially
12515 * contain block exits for the current scope, we need to check if they
12516 * are contained in the list of block exits and remove them if they are.
12517 */
12518 case PM_BREAK_NODE:
12519 case PM_NEXT_NODE:
12520 case PM_REDO_NODE: {
12521 pm_parser_t *parser = (pm_parser_t *) data;
12522 size_t index = 0;
12523
12524 while (index < parser->current_block_exits->size) {
12525 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12526
12527 if (block_exit == node) {
12528 if (index + 1 < parser->current_block_exits->size) {
12529 memmove(
12530 &parser->current_block_exits->nodes[index],
12531 &parser->current_block_exits->nodes[index + 1],
12532 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12533 );
12534 }
12535 parser->current_block_exits->size--;
12536 return false;
12537 }
12538
12539 index++;
12540 }
12541
12542 return true;
12543 }
12544 /* When an implicit local variable is written to or targeted, it becomes
12545 * a regular, named local variable. This branch removes it from the list
12546 * of implicit parameters when that happens. */
12547 case PM_LOCAL_VARIABLE_READ_NODE:
12548 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12549 pm_parser_t *parser = (pm_parser_t *) data;
12550 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12551
12552 for (size_t index = 0; index < implicit_parameters->size; index++) {
12553 if (implicit_parameters->nodes[index] == node) {
12554 /* If the node is not the last one in the list, we need to
12555 * shift the remaining nodes down to fill the gap. This is
12556 * extremely unlikely to happen. */
12557 if (index != implicit_parameters->size - 1) {
12558 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12559 }
12560
12561 implicit_parameters->size--;
12562 break;
12563 }
12564 }
12565
12566 return false;
12567 }
12568 default:
12569 return true;
12570 }
12571}
12572
12578static void
12579pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12580 pm_visit_node(node, pm_node_unreference_each, parser);
12581}
12582
12587static void
12588parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12589 // The method name needs to change. If we previously had
12590 // foo, we now need foo=. In this case we'll allocate a new
12591 // owned string, copy the previous method name in, and
12592 // append an =.
12593 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12594 size_t length = constant->length;
12595 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12596 if (name == NULL) return;
12597
12598 memcpy(name, constant->start, length);
12599 name[length] = '=';
12600
12601 // Now switch the name to the new string.
12602 // This silences clang analyzer warning about leak of memory pointed by `name`.
12603 // NOLINTNEXTLINE(clang-analyzer-*)
12604 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12605}
12606
12613static pm_node_t *
12614parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12615 switch (PM_NODE_TYPE(target)) {
12616 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12617 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12618 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12619 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12620 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12621 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12622 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12623 default: break;
12624 }
12625
12626 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
12627 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12628
12629 pm_node_destroy(parser, target);
12630 return UP(result);
12631}
12632
12641static pm_node_t *
12642parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12643 switch (PM_NODE_TYPE(target)) {
12644 case PM_MISSING_NODE:
12645 return target;
12646 case PM_SOURCE_ENCODING_NODE:
12647 case PM_FALSE_NODE:
12648 case PM_SOURCE_FILE_NODE:
12649 case PM_SOURCE_LINE_NODE:
12650 case PM_NIL_NODE:
12651 case PM_SELF_NODE:
12652 case PM_TRUE_NODE: {
12653 // In these special cases, we have specific error messages and we
12654 // will replace them with local variable writes.
12655 return parse_unwriteable_target(parser, target);
12656 }
12657 case PM_CLASS_VARIABLE_READ_NODE:
12659 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12660 return target;
12661 case PM_CONSTANT_PATH_NODE:
12662 if (context_def_p(parser)) {
12663 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12664 }
12665
12667 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12668
12669 return target;
12670 case PM_CONSTANT_READ_NODE:
12671 if (context_def_p(parser)) {
12672 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12673 }
12674
12675 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12676 target->type = PM_CONSTANT_TARGET_NODE;
12677
12678 return target;
12679 case PM_BACK_REFERENCE_READ_NODE:
12680 case PM_NUMBERED_REFERENCE_READ_NODE:
12681 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12682 return target;
12683 case PM_GLOBAL_VARIABLE_READ_NODE:
12685 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12686 return target;
12687 case PM_LOCAL_VARIABLE_READ_NODE: {
12688 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12689 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
12690 pm_node_unreference(parser, target);
12691 }
12692
12693 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12694 uint32_t name = cast->name;
12695 uint32_t depth = cast->depth;
12696 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12697
12699 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12700
12701 return target;
12702 }
12703 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12704 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12705 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12706
12707 pm_node_unreference(parser, target);
12708 pm_node_destroy(parser, target);
12709
12710 return node;
12711 }
12712 case PM_INSTANCE_VARIABLE_READ_NODE:
12714 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12715 return target;
12716 case PM_MULTI_TARGET_NODE:
12717 if (splat_parent) {
12718 // Multi target is not accepted in all positions. If this is one
12719 // of them, then we need to add an error.
12720 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12721 }
12722
12723 return target;
12724 case PM_SPLAT_NODE: {
12725 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12726
12727 if (splat->expression != NULL) {
12728 splat->expression = parse_target(parser, splat->expression, multiple, true);
12729 }
12730
12731 return UP(splat);
12732 }
12733 case PM_CALL_NODE: {
12734 pm_call_node_t *call = (pm_call_node_t *) target;
12735
12736 // If we have no arguments to the call node and we need this to be a
12737 // target then this is either a method call or a local variable
12738 // write.
12739 if (
12740 (call->message_loc.start != NULL) &&
12741 (call->message_loc.end[-1] != '!') &&
12742 (call->message_loc.end[-1] != '?') &&
12743 (call->opening_loc.start == NULL) &&
12744 (call->arguments == NULL) &&
12745 (call->block == NULL)
12746 ) {
12747 if (call->receiver == NULL) {
12748 // When we get here, we have a local variable write, because it
12749 // was previously marked as a method call but now we have an =.
12750 // This looks like:
12751 //
12752 // foo = 1
12753 //
12754 // When it was parsed in the prefix position, foo was seen as a
12755 // method call with no receiver and no arguments. Now we have an
12756 // =, so we know it's a local variable write.
12757 const pm_location_t message_loc = call->message_loc;
12758
12759 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
12760 pm_node_destroy(parser, target);
12761
12762 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12763 }
12764
12765 if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
12766 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12767 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12768 }
12769
12770 parse_write_name(parser, &call->name);
12771 return UP(pm_call_target_node_create(parser, call));
12772 }
12773 }
12774
12775 // If there is no call operator and the message is "[]" then this is
12776 // an aref expression, and we can transform it into an aset
12777 // expression.
12778 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12779 return UP(pm_index_target_node_create(parser, call));
12780 }
12781 }
12783 default:
12784 // In this case we have a node that we don't know how to convert
12785 // into a target. We need to treat it as an error. For now, we'll
12786 // mark it as an error and just skip right past it.
12787 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12788 return target;
12789 }
12790}
12791
12796static pm_node_t *
12797parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12798 pm_node_t *result = parse_target(parser, target, multiple, false);
12799
12800 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12801 // parens after the targets.
12802 if (
12803 !match1(parser, PM_TOKEN_EQUAL) &&
12804 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12805 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12806 ) {
12807 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12808 }
12809
12810 return result;
12811}
12812
12817static pm_node_t *
12818parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12819 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12820
12821 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12822 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12823 }
12824
12825 return write;
12826}
12827
12831static pm_node_t *
12832parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12833 switch (PM_NODE_TYPE(target)) {
12834 case PM_MISSING_NODE:
12835 pm_node_destroy(parser, value);
12836 return target;
12837 case PM_CLASS_VARIABLE_READ_NODE: {
12838 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12839 pm_node_destroy(parser, target);
12840 return UP(node);
12841 }
12842 case PM_CONSTANT_PATH_NODE: {
12843 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12844
12845 if (context_def_p(parser)) {
12846 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12847 }
12848
12849 return parse_shareable_constant_write(parser, node);
12850 }
12851 case PM_CONSTANT_READ_NODE: {
12852 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12853
12854 if (context_def_p(parser)) {
12855 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12856 }
12857
12858 pm_node_destroy(parser, target);
12859 return parse_shareable_constant_write(parser, node);
12860 }
12861 case PM_BACK_REFERENCE_READ_NODE:
12862 case PM_NUMBERED_REFERENCE_READ_NODE:
12863 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12865 case PM_GLOBAL_VARIABLE_READ_NODE: {
12866 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12867 pm_node_destroy(parser, target);
12868 return UP(node);
12869 }
12870 case PM_LOCAL_VARIABLE_READ_NODE: {
12872
12873 pm_constant_id_t name = local_read->name;
12874 pm_location_t name_loc = target->location;
12875
12876 uint32_t depth = local_read->depth;
12877 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12878
12879 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12880 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12881 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
12882 pm_node_unreference(parser, target);
12883 }
12884
12885 pm_locals_unread(&scope->locals, name);
12886 pm_node_destroy(parser, target);
12887
12888 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
12889 }
12890 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12891 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12892 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12893
12894 pm_node_unreference(parser, target);
12895 pm_node_destroy(parser, target);
12896
12897 return node;
12898 }
12899 case PM_INSTANCE_VARIABLE_READ_NODE: {
12900 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12901 pm_node_destroy(parser, target);
12902 return write_node;
12903 }
12904 case PM_MULTI_TARGET_NODE:
12905 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12906 case PM_SPLAT_NODE: {
12907 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12908
12909 if (splat->expression != NULL) {
12910 splat->expression = parse_write(parser, splat->expression, operator, value);
12911 }
12912
12913 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12914 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12915
12916 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12917 }
12918 case PM_CALL_NODE: {
12919 pm_call_node_t *call = (pm_call_node_t *) target;
12920
12921 // If we have no arguments to the call node and we need this to be a
12922 // target then this is either a method call or a local variable
12923 // write.
12924 if (
12925 (call->message_loc.start != NULL) &&
12926 (call->message_loc.end[-1] != '!') &&
12927 (call->message_loc.end[-1] != '?') &&
12928 (call->opening_loc.start == NULL) &&
12929 (call->arguments == NULL) &&
12930 (call->block == NULL)
12931 ) {
12932 if (call->receiver == NULL) {
12933 // When we get here, we have a local variable write, because it
12934 // was previously marked as a method call but now we have an =.
12935 // This looks like:
12936 //
12937 // foo = 1
12938 //
12939 // When it was parsed in the prefix position, foo was seen as a
12940 // method call with no receiver and no arguments. Now we have an
12941 // =, so we know it's a local variable write.
12942 const pm_location_t message = call->message_loc;
12943
12944 pm_parser_local_add_location(parser, message.start, message.end, 0);
12945 pm_node_destroy(parser, target);
12946
12947 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
12948 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
12949
12950 pm_refute_numbered_parameter(parser, message.start, message.end);
12951 return target;
12952 }
12953
12954 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
12955 // When we get here, we have a method call, because it was
12956 // previously marked as a method call but now we have an =. This
12957 // looks like:
12958 //
12959 // foo.bar = 1
12960 //
12961 // When it was parsed in the prefix position, foo.bar was seen as a
12962 // method call with no arguments. Now we have an =, so we know it's
12963 // a method call with an argument. In this case we will create the
12964 // arguments node, parse the argument, and add it to the list.
12965 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
12966 call->arguments = arguments;
12967
12968 pm_arguments_node_arguments_append(arguments, value);
12969 call->base.location.end = arguments->base.location.end;
12970 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
12971
12972 parse_write_name(parser, &call->name);
12973 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
12974
12975 return UP(call);
12976 }
12977 }
12978
12979 // If there is no call operator and the message is "[]" then this is
12980 // an aref expression, and we can transform it into an aset
12981 // expression.
12982 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12983 if (call->arguments == NULL) {
12984 call->arguments = pm_arguments_node_create(parser);
12985 }
12986
12987 pm_arguments_node_arguments_append(call->arguments, value);
12988 target->location.end = value->location.end;
12989
12990 // Replace the name with "[]=".
12991 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
12992 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
12993
12994 // Ensure that the arguments for []= don't contain keywords
12995 pm_index_arguments_check(parser, call->arguments, call->block);
12996 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
12997
12998 return target;
12999 }
13000
13001 // If there are arguments on the call node, then it can't be a
13002 // method call ending with = or a local variable write, so it must
13003 // be a syntax error. In this case we'll fall through to our default
13004 // handling. We need to free the value that we parsed because there
13005 // is no way for us to attach it to the tree at this point.
13006 //
13007 // Since it is possible for the value to contain an implicit
13008 // parameter somewhere in its subtree, we need to walk it and remove
13009 // any implicit parameters from the list of implicit parameters for
13010 // the current scope.
13011 pm_node_unreference(parser, value);
13012 pm_node_destroy(parser, value);
13013 }
13015 default:
13016 // In this case we have a node that we don't know how to convert into a
13017 // target. We need to treat it as an error. For now, we'll mark it as an
13018 // error and just skip right past it.
13019 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13020 return target;
13021 }
13022}
13023
13030static pm_node_t *
13031parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13032 switch (PM_NODE_TYPE(target)) {
13033 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13034 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13035 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13036 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13037 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13038 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13039 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13040 default: break;
13041 }
13042
13043 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13044 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13045
13046 pm_node_destroy(parser, target);
13047 return UP(result);
13048}
13049
13060static pm_node_t *
13061parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13062 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13063
13064 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13065 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13066
13067 while (accept1(parser, PM_TOKEN_COMMA)) {
13068 if (accept1(parser, PM_TOKEN_USTAR)) {
13069 // Here we have a splat operator. It can have a name or be
13070 // anonymous. It can be the final target or be in the middle if
13071 // there haven't been any others yet.
13072 if (has_rest) {
13073 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13074 }
13075
13076 pm_token_t star_operator = parser->previous;
13077 pm_node_t *name = NULL;
13078
13079 if (token_begins_expression_p(parser->current.type)) {
13080 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13081 name = parse_target(parser, name, true, true);
13082 }
13083
13084 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13085 pm_multi_target_node_targets_append(parser, result, splat);
13086 has_rest = true;
13087 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13088 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13089 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13090 target = parse_target(parser, target, true, false);
13091
13092 pm_multi_target_node_targets_append(parser, result, target);
13093 context_pop(parser);
13094 } else if (token_begins_expression_p(parser->current.type)) {
13095 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13096 target = parse_target(parser, target, true, false);
13097
13098 pm_multi_target_node_targets_append(parser, result, target);
13099 } else if (!match1(parser, PM_TOKEN_EOF)) {
13100 // If we get here, then we have a trailing , in a multi target node.
13101 // We'll add an implicit rest node to represent this.
13102 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13103 pm_multi_target_node_targets_append(parser, result, rest);
13104 break;
13105 }
13106 }
13107
13108 return UP(result);
13109}
13110
13115static pm_node_t *
13116parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13117 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13118 accept1(parser, PM_TOKEN_NEWLINE);
13119
13120 // Ensure that we have either an = or a ) after the targets.
13121 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13122 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13123 }
13124
13125 return result;
13126}
13127
13131static pm_statements_node_t *
13132parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13133 // First, skip past any optional terminators that might be at the beginning
13134 // of the statements.
13135 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13136
13137 // If we have a terminator, then we can just return NULL.
13138 if (context_terminator(context, &parser->current)) return NULL;
13139
13140 pm_statements_node_t *statements = pm_statements_node_create(parser);
13141
13142 // At this point we know we have at least one statement, and that it
13143 // immediately follows the current token.
13144 context_push(parser, context);
13145
13146 while (true) {
13147 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13148 pm_statements_node_body_append(parser, statements, node, true);
13149
13150 // If we're recovering from a syntax error, then we need to stop parsing
13151 // the statements now.
13152 if (parser->recovering) {
13153 // If this is the level of context where the recovery has happened,
13154 // then we can mark the parser as done recovering.
13155 if (context_terminator(context, &parser->current)) parser->recovering = false;
13156 break;
13157 }
13158
13159 // If we have a terminator, then we will parse all consecutive
13160 // terminators and then continue parsing the statements list.
13161 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13162 // If we have a terminator, then we will continue parsing the
13163 // statements list.
13164 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13165 if (context_terminator(context, &parser->current)) break;
13166
13167 // Now we can continue parsing the list of statements.
13168 continue;
13169 }
13170
13171 // At this point we have a list of statements that are not terminated by
13172 // a newline or semicolon. At this point we need to check if we're at
13173 // the end of the statements list. If we are, then we should break out
13174 // of the loop.
13175 if (context_terminator(context, &parser->current)) break;
13176
13177 // At this point, we have a syntax error, because the statement was not
13178 // terminated by a newline or semicolon, and we're not at the end of the
13179 // statements list. Ideally we should scan forward to determine if we
13180 // should insert a missing terminator or break out of parsing the
13181 // statements list at this point.
13182 //
13183 // We don't have that yet, so instead we'll do a more naive approach. If
13184 // we were unable to parse an expression, then we will skip past this
13185 // token and continue parsing the statements list. Otherwise we'll add
13186 // an error and continue parsing the statements list.
13187 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13188 parser_lex(parser);
13189
13190 // If we are at the end of the file, then we need to stop parsing
13191 // the statements entirely at this point. Mark the parser as
13192 // recovering, as we know that EOF closes the top-level context, and
13193 // then break out of the loop.
13194 if (match1(parser, PM_TOKEN_EOF)) {
13195 parser->recovering = true;
13196 break;
13197 }
13198
13199 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13200 if (context_terminator(context, &parser->current)) break;
13201 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13202 // This is an inlined version of accept1 because the error that we
13203 // want to add has varargs. If this happens again, we should
13204 // probably extract a helper function.
13205 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13206 parser->previous.start = parser->previous.end;
13207 parser->previous.type = PM_TOKEN_MISSING;
13208 }
13209 }
13210
13211 context_pop(parser);
13212 bool last_value = true;
13213 switch (context) {
13216 last_value = false;
13217 break;
13218 default:
13219 break;
13220 }
13221 pm_void_statements_check(parser, statements, last_value);
13222
13223 return statements;
13224}
13225
13230static void
13231pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13232 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13233
13234 if (duplicated != NULL) {
13235 pm_buffer_t buffer = { 0 };
13236 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13237
13238 pm_diagnostic_list_append_format(
13239 &parser->warning_list,
13240 duplicated->location.start,
13241 duplicated->location.end,
13242 PM_WARN_DUPLICATED_HASH_KEY,
13243 (int) pm_buffer_length(&buffer),
13244 pm_buffer_value(&buffer),
13245 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13246 );
13247
13248 pm_buffer_free(&buffer);
13249 }
13250}
13251
13256static void
13257pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13258 pm_node_t *previous;
13259
13260 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13261 pm_diagnostic_list_append_format(
13262 &parser->warning_list,
13263 node->location.start,
13264 node->location.end,
13265 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13266 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13267 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13268 );
13269 }
13270}
13271
13275static bool
13276parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13277 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13278 bool contains_keyword_splat = false;
13279
13280 while (true) {
13281 pm_node_t *element;
13282
13283 switch (parser->current.type) {
13284 case PM_TOKEN_USTAR_STAR: {
13285 parser_lex(parser);
13286 pm_token_t operator = parser->previous;
13287 pm_node_t *value = NULL;
13288
13289 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13290 // If we're about to parse a nested hash that is being
13291 // pushed into this hash directly with **, then we want the
13292 // inner hash to share the static literals with the outer
13293 // hash.
13294 parser->current_hash_keys = literals;
13295 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13296 } else if (token_begins_expression_p(parser->current.type)) {
13297 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13298 } else {
13299 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13300 }
13301
13302 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13303 contains_keyword_splat = true;
13304 break;
13305 }
13306 case PM_TOKEN_LABEL: {
13307 pm_token_t label = parser->current;
13308 parser_lex(parser);
13309
13310 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13311 pm_hash_key_static_literals_add(parser, literals, key);
13312
13313 pm_token_t operator = not_provided(parser);
13314 pm_node_t *value = NULL;
13315
13316 if (token_begins_expression_p(parser->current.type)) {
13317 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13318 } else {
13319 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13320 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13321 value = UP(pm_constant_read_node_create(parser, &constant));
13322 } else {
13323 int depth = -1;
13324 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13325
13326 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13327 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13328 } else {
13329 depth = pm_parser_local_depth(parser, &identifier);
13330 }
13331
13332 if (depth == -1) {
13333 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13334 } else {
13335 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13336 }
13337 }
13338
13339 value->location.end++;
13340 value = UP(pm_implicit_node_create(parser, value));
13341 }
13342
13343 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13344 break;
13345 }
13346 default: {
13347 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13348
13349 // Hash keys that are strings are automatically frozen. We will
13350 // mark that here.
13351 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13352 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13353 }
13354
13355 pm_hash_key_static_literals_add(parser, literals, key);
13356
13357 pm_token_t operator;
13358 if (pm_symbol_node_label_p(key)) {
13359 operator = not_provided(parser);
13360 } else {
13361 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13362 operator = parser->previous;
13363 }
13364
13365 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13366 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13367 break;
13368 }
13369 }
13370
13371 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13372 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
13373 } else {
13374 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
13375 }
13376
13377 // If there's no comma after the element, then we're done.
13378 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13379
13380 // If the next element starts with a label or a **, then we know we have
13381 // another element in the hash, so we'll continue parsing.
13382 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13383
13384 // Otherwise we need to check if the subsequent token begins an expression.
13385 // If it does, then we'll continue parsing.
13386 if (token_begins_expression_p(parser->current.type)) continue;
13387
13388 // Otherwise by default we will exit out of this loop.
13389 break;
13390 }
13391
13392 return contains_keyword_splat;
13393}
13394
13398static inline void
13399parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13400 if (arguments->arguments == NULL) {
13401 arguments->arguments = pm_arguments_node_create(parser);
13402 }
13403
13404 pm_arguments_node_arguments_append(arguments->arguments, argument);
13405}
13406
13410static void
13411parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13412 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13413
13414 // First we need to check if the next token is one that could be the start
13415 // of an argument. If it's not, then we can just return.
13416 if (
13417 match2(parser, terminator, PM_TOKEN_EOF) ||
13418 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13419 context_terminator(parser->current_context->context, &parser->current)
13420 ) {
13421 return;
13422 }
13423
13424 bool parsed_first_argument = false;
13425 bool parsed_bare_hash = false;
13426 bool parsed_block_argument = false;
13427 bool parsed_forwarding_arguments = false;
13428
13429 while (!match1(parser, PM_TOKEN_EOF)) {
13430 if (parsed_forwarding_arguments) {
13431 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13432 }
13433
13434 pm_node_t *argument = NULL;
13435
13436 switch (parser->current.type) {
13437 case PM_TOKEN_USTAR_STAR:
13438 case PM_TOKEN_LABEL: {
13439 if (parsed_bare_hash) {
13440 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13441 }
13442
13443 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13444 argument = UP(hash);
13445
13446 pm_static_literals_t hash_keys = { 0 };
13447 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13448
13449 parse_arguments_append(parser, arguments, argument);
13450
13451 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13452 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13453 pm_node_flag_set(UP(arguments->arguments), flags);
13454
13455 pm_static_literals_free(&hash_keys);
13456 parsed_bare_hash = true;
13457
13458 break;
13459 }
13460 case PM_TOKEN_UAMPERSAND: {
13461 parser_lex(parser);
13462 pm_token_t operator = parser->previous;
13463 pm_node_t *expression = NULL;
13464
13465 if (token_begins_expression_p(parser->current.type)) {
13466 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13467 } else {
13468 pm_parser_scope_forwarding_block_check(parser, &operator);
13469 }
13470
13471 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13472 if (parsed_block_argument) {
13473 parse_arguments_append(parser, arguments, argument);
13474 } else {
13475 arguments->block = argument;
13476 }
13477
13478 if (match1(parser, PM_TOKEN_COMMA)) {
13479 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13480 }
13481
13482 parsed_block_argument = true;
13483 break;
13484 }
13485 case PM_TOKEN_USTAR: {
13486 parser_lex(parser);
13487 pm_token_t operator = parser->previous;
13488
13489 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13490 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13491 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13492 if (parsed_bare_hash) {
13493 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13494 }
13495 } else {
13496 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13497
13498 if (parsed_bare_hash) {
13499 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13500 }
13501
13502 argument = UP(pm_splat_node_create(parser, &operator, expression));
13503 }
13504
13505 parse_arguments_append(parser, arguments, argument);
13506 break;
13507 }
13508 case PM_TOKEN_UDOT_DOT_DOT: {
13509 if (accepts_forwarding) {
13510 parser_lex(parser);
13511
13512 if (token_begins_expression_p(parser->current.type)) {
13513 // If the token begins an expression then this ... was
13514 // not actually argument forwarding but was instead a
13515 // range.
13516 pm_token_t operator = parser->previous;
13517 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13518
13519 // If we parse a range, we need to validate that we
13520 // didn't accidentally violate the nonassoc rules of the
13521 // ... operator.
13522 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13523 pm_range_node_t *range = (pm_range_node_t *) right;
13524 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13525 }
13526
13527 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13528 } else {
13529 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13530 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13531 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13532 }
13533
13534 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13535 parse_arguments_append(parser, arguments, argument);
13536 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13537 arguments->has_forwarding = true;
13538 parsed_forwarding_arguments = true;
13539 break;
13540 }
13541 }
13542 }
13544 default: {
13545 if (argument == NULL) {
13546 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13547 }
13548
13549 bool contains_keywords = false;
13550 bool contains_keyword_splat = false;
13551
13552 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13553 if (parsed_bare_hash) {
13554 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13555 }
13556
13557 pm_token_t operator;
13558 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13559 operator = parser->previous;
13560 } else {
13561 operator = not_provided(parser);
13562 }
13563
13564 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13565 contains_keywords = true;
13566
13567 // Create the set of static literals for this hash.
13568 pm_static_literals_t hash_keys = { 0 };
13569 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13570
13571 // Finish parsing the one we are part way through.
13572 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13573 argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
13574
13575 pm_keyword_hash_node_elements_append(bare_hash, argument);
13576 argument = UP(bare_hash);
13577
13578 // Then parse more if we have a comma
13579 if (accept1(parser, PM_TOKEN_COMMA) && (
13580 token_begins_expression_p(parser->current.type) ||
13581 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13582 )) {
13583 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13584 }
13585
13586 pm_static_literals_free(&hash_keys);
13587 parsed_bare_hash = true;
13588 }
13589
13590 parse_arguments_append(parser, arguments, argument);
13591
13592 pm_node_flags_t flags = 0;
13593 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13594 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13595 pm_node_flag_set(UP(arguments->arguments), flags);
13596
13597 break;
13598 }
13599 }
13600
13601 parsed_first_argument = true;
13602
13603 // If parsing the argument failed, we need to stop parsing arguments.
13604 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13605
13606 // If the terminator of these arguments is not EOF, then we have a
13607 // specific token we're looking for. In that case we can accept a
13608 // newline here because it is not functioning as a statement terminator.
13609 bool accepted_newline = false;
13610 if (terminator != PM_TOKEN_EOF) {
13611 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13612 }
13613
13614 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13615 // If we previously were on a comma and we just parsed a bare hash,
13616 // then we want to continue parsing arguments. This is because the
13617 // comma was grabbed up by the hash parser.
13618 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13619 // If there was a comma, then we need to check if we also accepted a
13620 // newline. If we did, then this is a syntax error.
13621 if (accepted_newline) {
13622 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13623 }
13624
13625 // If this is a command call and an argument takes a block,
13626 // there can be no further arguments. For example,
13627 // `foo(bar 1 do end, 2)` should be rejected.
13628 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13629 pm_call_node_t *call = (pm_call_node_t *) argument;
13630 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
13631 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13632 break;
13633 }
13634 }
13635 } else {
13636 // If there is no comma at the end of the argument list then we're
13637 // done parsing arguments and can break out of this loop.
13638 break;
13639 }
13640
13641 // If we hit the terminator, then that means we have a trailing comma so
13642 // we can accept that output as well.
13643 if (match1(parser, terminator)) break;
13644 }
13645}
13646
13658parse_required_destructured_parameter(pm_parser_t *parser) {
13659 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13660
13661 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13662 pm_multi_target_node_opening_set(node, &parser->previous);
13663
13664 do {
13665 pm_node_t *param;
13666
13667 // If we get here then we have a trailing comma, which isn't allowed in
13668 // the grammar. In other places, multi targets _do_ allow trailing
13669 // commas, so here we'll assume this is a mistake of the user not
13670 // knowing it's not allowed here.
13671 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13672 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13673 pm_multi_target_node_targets_append(parser, node, param);
13674 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13675 break;
13676 }
13677
13678 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13679 param = UP(parse_required_destructured_parameter(parser));
13680 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13681 pm_token_t star = parser->previous;
13682 pm_node_t *value = NULL;
13683
13684 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13685 pm_token_t name = parser->previous;
13686 value = UP(pm_required_parameter_node_create(parser, &name));
13687 if (pm_parser_parameter_name_check(parser, &name)) {
13688 pm_node_flag_set_repeated_parameter(value);
13689 }
13690 pm_parser_local_add_token(parser, &name, 1);
13691 }
13692
13693 param = UP(pm_splat_node_create(parser, &star, value));
13694 } else {
13695 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13696 pm_token_t name = parser->previous;
13697
13698 param = UP(pm_required_parameter_node_create(parser, &name));
13699 if (pm_parser_parameter_name_check(parser, &name)) {
13700 pm_node_flag_set_repeated_parameter(param);
13701 }
13702 pm_parser_local_add_token(parser, &name, 1);
13703 }
13704
13705 pm_multi_target_node_targets_append(parser, node, param);
13706 } while (accept1(parser, PM_TOKEN_COMMA));
13707
13708 accept1(parser, PM_TOKEN_NEWLINE);
13709 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13710 pm_multi_target_node_closing_set(node, &parser->previous);
13711
13712 return node;
13713}
13714
13719typedef enum {
13720 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13721 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13722 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13723 PM_PARAMETERS_ORDER_KEYWORDS,
13724 PM_PARAMETERS_ORDER_REST,
13725 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13726 PM_PARAMETERS_ORDER_OPTIONAL,
13727 PM_PARAMETERS_ORDER_NAMED,
13728 PM_PARAMETERS_ORDER_NONE,
13729} pm_parameters_order_t;
13730
13734static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13735 [0] = PM_PARAMETERS_NO_CHANGE,
13736 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13737 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13738 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13739 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13740 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13741 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13742 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13743 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13744 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13745 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13746 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13747};
13748
13756static bool
13757update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13758 pm_parameters_order_t state = parameters_ordering[token->type];
13759 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13760
13761 // If we see another ordered argument after a optional argument
13762 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13763 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13764 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13765 return true;
13766 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13767 return true;
13768 }
13769
13770 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13771 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13772 return false;
13773 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13774 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13775 return false;
13776 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13777 // We know what transition we failed on, so we can provide a better error here.
13778 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13779 return false;
13780 }
13781
13782 if (state < *current) *current = state;
13783 return true;
13784}
13785
13789static pm_parameters_node_t *
13790parse_parameters(
13791 pm_parser_t *parser,
13792 pm_binding_power_t binding_power,
13793 bool uses_parentheses,
13794 bool allows_trailing_comma,
13795 bool allows_forwarding_parameters,
13796 bool accepts_blocks_in_defaults,
13797 bool in_block,
13798 uint16_t depth
13799) {
13800 pm_do_loop_stack_push(parser, false);
13801
13802 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13803 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13804
13805 while (true) {
13806 bool parsing = true;
13807
13808 switch (parser->current.type) {
13809 case PM_TOKEN_PARENTHESIS_LEFT: {
13810 update_parameter_state(parser, &parser->current, &order);
13811 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13812
13813 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13814 pm_parameters_node_requireds_append(params, param);
13815 } else {
13816 pm_parameters_node_posts_append(params, param);
13817 }
13818 break;
13819 }
13820 case PM_TOKEN_UAMPERSAND:
13821 case PM_TOKEN_AMPERSAND: {
13822 update_parameter_state(parser, &parser->current, &order);
13823 parser_lex(parser);
13824
13825 pm_token_t operator = parser->previous;
13826 pm_token_t name;
13827
13828 bool repeated = false;
13829 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13830 name = parser->previous;
13831 repeated = pm_parser_parameter_name_check(parser, &name);
13832 pm_parser_local_add_token(parser, &name, 1);
13833 } else {
13834 name = not_provided(parser);
13835 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13836 }
13837
13838 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
13839 if (repeated) {
13840 pm_node_flag_set_repeated_parameter(UP(param));
13841 }
13842 if (params->block == NULL) {
13843 pm_parameters_node_block_set(params, param);
13844 } else {
13845 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
13846 pm_parameters_node_posts_append(params, UP(param));
13847 }
13848
13849 break;
13850 }
13851 case PM_TOKEN_UDOT_DOT_DOT: {
13852 if (!allows_forwarding_parameters) {
13853 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
13854 }
13855
13856 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13857 parser_lex(parser);
13858
13859 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13860 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13861
13862 if (params->keyword_rest != NULL) {
13863 // If we already have a keyword rest parameter, then we replace it with the
13864 // forwarding parameter and move the keyword rest parameter to the posts list.
13865 pm_node_t *keyword_rest = params->keyword_rest;
13866 pm_parameters_node_posts_append(params, keyword_rest);
13867 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13868 params->keyword_rest = NULL;
13869 }
13870
13871 pm_parameters_node_keyword_rest_set(params, UP(param));
13872 break;
13873 }
13874 case PM_TOKEN_CLASS_VARIABLE:
13875 case PM_TOKEN_IDENTIFIER:
13876 case PM_TOKEN_CONSTANT:
13877 case PM_TOKEN_INSTANCE_VARIABLE:
13878 case PM_TOKEN_GLOBAL_VARIABLE:
13879 case PM_TOKEN_METHOD_NAME: {
13880 parser_lex(parser);
13881 switch (parser->previous.type) {
13882 case PM_TOKEN_CONSTANT:
13883 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13884 break;
13885 case PM_TOKEN_INSTANCE_VARIABLE:
13886 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13887 break;
13888 case PM_TOKEN_GLOBAL_VARIABLE:
13889 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13890 break;
13891 case PM_TOKEN_CLASS_VARIABLE:
13892 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13893 break;
13894 case PM_TOKEN_METHOD_NAME:
13895 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13896 break;
13897 default: break;
13898 }
13899
13900 if (parser->current.type == PM_TOKEN_EQUAL) {
13901 update_parameter_state(parser, &parser->current, &order);
13902 } else {
13903 update_parameter_state(parser, &parser->previous, &order);
13904 }
13905
13906 pm_token_t name = parser->previous;
13907 bool repeated = pm_parser_parameter_name_check(parser, &name);
13908 pm_parser_local_add_token(parser, &name, 1);
13909
13910 if (match1(parser, PM_TOKEN_EQUAL)) {
13911 pm_token_t operator = parser->current;
13912 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13913 parser_lex(parser);
13914
13915 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
13916 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
13917
13918 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
13919 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
13920 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
13921
13922 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
13923
13924 if (repeated) {
13925 pm_node_flag_set_repeated_parameter(UP(param));
13926 }
13927 pm_parameters_node_optionals_append(params, param);
13928
13929 // If the value of the parameter increased the number of
13930 // reads of that parameter, then we need to warn that we
13931 // have a circular definition.
13932 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
13933 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
13934 }
13935
13936 context_pop(parser);
13937
13938 // If parsing the value of the parameter resulted in error recovery,
13939 // then we can put a missing node in its place and stop parsing the
13940 // parameters entirely now.
13941 if (parser->recovering) {
13942 parsing = false;
13943 break;
13944 }
13945 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13946 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13947 if (repeated) {
13948 pm_node_flag_set_repeated_parameter(UP(param));
13949 }
13950 pm_parameters_node_requireds_append(params, UP(param));
13951 } else {
13952 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13953 if (repeated) {
13954 pm_node_flag_set_repeated_parameter(UP(param));
13955 }
13956 pm_parameters_node_posts_append(params, UP(param));
13957 }
13958
13959 break;
13960 }
13961 case PM_TOKEN_LABEL: {
13962 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
13963 update_parameter_state(parser, &parser->current, &order);
13964
13965 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13966 parser_lex(parser);
13967
13968 pm_token_t name = parser->previous;
13969 pm_token_t local = name;
13970 local.end -= 1;
13971
13972 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
13973 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13974 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
13975 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
13976 }
13977
13978 bool repeated = pm_parser_parameter_name_check(parser, &local);
13979 pm_parser_local_add_token(parser, &local, 1);
13980
13981 switch (parser->current.type) {
13982 case PM_TOKEN_COMMA:
13983 case PM_TOKEN_PARENTHESIS_RIGHT:
13984 case PM_TOKEN_PIPE: {
13985 context_pop(parser);
13986
13987 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
13988 if (repeated) {
13989 pm_node_flag_set_repeated_parameter(param);
13990 }
13991
13992 pm_parameters_node_keywords_append(params, param);
13993 break;
13994 }
13995 case PM_TOKEN_SEMICOLON:
13996 case PM_TOKEN_NEWLINE: {
13997 context_pop(parser);
13998
13999 if (uses_parentheses) {
14000 parsing = false;
14001 break;
14002 }
14003
14004 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14005 if (repeated) {
14006 pm_node_flag_set_repeated_parameter(param);
14007 }
14008
14009 pm_parameters_node_keywords_append(params, param);
14010 break;
14011 }
14012 default: {
14013 pm_node_t *param;
14014
14015 if (token_begins_expression_p(parser->current.type)) {
14016 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14017 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14018
14019 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14020 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14021 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14022
14023 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14024 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14025 }
14026
14027 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14028 }
14029 else {
14030 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14031 }
14032
14033 if (repeated) {
14034 pm_node_flag_set_repeated_parameter(param);
14035 }
14036
14037 context_pop(parser);
14038 pm_parameters_node_keywords_append(params, param);
14039
14040 // If parsing the value of the parameter resulted in error recovery,
14041 // then we can put a missing node in its place and stop parsing the
14042 // parameters entirely now.
14043 if (parser->recovering) {
14044 parsing = false;
14045 break;
14046 }
14047 }
14048 }
14049
14050 parser->in_keyword_arg = false;
14051 break;
14052 }
14053 case PM_TOKEN_USTAR:
14054 case PM_TOKEN_STAR: {
14055 update_parameter_state(parser, &parser->current, &order);
14056 parser_lex(parser);
14057
14058 pm_token_t operator = parser->previous;
14059 pm_token_t name;
14060 bool repeated = false;
14061
14062 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14063 name = parser->previous;
14064 repeated = pm_parser_parameter_name_check(parser, &name);
14065 pm_parser_local_add_token(parser, &name, 1);
14066 } else {
14067 name = not_provided(parser);
14068 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14069 }
14070
14071 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
14072 if (repeated) {
14073 pm_node_flag_set_repeated_parameter(param);
14074 }
14075
14076 if (params->rest == NULL) {
14077 pm_parameters_node_rest_set(params, param);
14078 } else {
14079 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14080 pm_parameters_node_posts_append(params, param);
14081 }
14082
14083 break;
14084 }
14085 case PM_TOKEN_STAR_STAR:
14086 case PM_TOKEN_USTAR_STAR: {
14087 pm_parameters_order_t previous_order = order;
14088 update_parameter_state(parser, &parser->current, &order);
14089 parser_lex(parser);
14090
14091 pm_token_t operator = parser->previous;
14092 pm_node_t *param;
14093
14094 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14095 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14096 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14097 }
14098
14099 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14100 } else {
14101 pm_token_t name;
14102
14103 bool repeated = false;
14104 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14105 name = parser->previous;
14106 repeated = pm_parser_parameter_name_check(parser, &name);
14107 pm_parser_local_add_token(parser, &name, 1);
14108 } else {
14109 name = not_provided(parser);
14110 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14111 }
14112
14113 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
14114 if (repeated) {
14115 pm_node_flag_set_repeated_parameter(param);
14116 }
14117 }
14118
14119 if (params->keyword_rest == NULL) {
14120 pm_parameters_node_keyword_rest_set(params, param);
14121 } else {
14122 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14123 pm_parameters_node_posts_append(params, param);
14124 }
14125
14126 break;
14127 }
14128 default:
14129 if (parser->previous.type == PM_TOKEN_COMMA) {
14130 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14131 // If we get here, then we have a trailing comma in a
14132 // block parameter list.
14133 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14134
14135 if (params->rest == NULL) {
14136 pm_parameters_node_rest_set(params, param);
14137 } else {
14138 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14139 pm_parameters_node_posts_append(params, UP(param));
14140 }
14141 } else {
14142 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14143 }
14144 }
14145
14146 parsing = false;
14147 break;
14148 }
14149
14150 // If we hit some kind of issue while parsing the parameter, this would
14151 // have been set to false. In that case, we need to break out of the
14152 // loop.
14153 if (!parsing) break;
14154
14155 bool accepted_newline = false;
14156 if (uses_parentheses) {
14157 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14158 }
14159
14160 if (accept1(parser, PM_TOKEN_COMMA)) {
14161 // If there was a comma, but we also accepted a newline, then this
14162 // is a syntax error.
14163 if (accepted_newline) {
14164 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14165 }
14166 } else {
14167 // If there was no comma, then we're done parsing parameters.
14168 break;
14169 }
14170 }
14171
14172 pm_do_loop_stack_pop(parser);
14173
14174 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14175 if (params->base.location.start == params->base.location.end) {
14176 pm_node_destroy(parser, UP(params));
14177 return NULL;
14178 }
14179
14180 return params;
14181}
14182
14187static size_t
14188token_newline_index(const pm_parser_t *parser) {
14189 if (parser->heredoc_end == NULL) {
14190 // This is the common case. In this case we can look at the previously
14191 // recorded newline in the newline list and subtract from the current
14192 // offset.
14193 return parser->newline_list.size - 1;
14194 } else {
14195 // This is unlikely. This is the case that we have already parsed the
14196 // start of a heredoc, so we cannot rely on looking at the previous
14197 // offset of the newline list, and instead must go through the whole
14198 // process of a binary search for the line number.
14199 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14200 }
14201}
14202
14207static int64_t
14208token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14209 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14210 const uint8_t *end = token->start;
14211
14212 // Skip over the BOM if it is present.
14213 if (
14214 newline_index == 0 &&
14215 parser->start[0] == 0xef &&
14216 parser->start[1] == 0xbb &&
14217 parser->start[2] == 0xbf
14218 ) cursor += 3;
14219
14220 int64_t column = 0;
14221 for (; cursor < end; cursor++) {
14222 switch (*cursor) {
14223 case '\t':
14224 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14225 break;
14226 case ' ':
14227 column++;
14228 break;
14229 default:
14230 column++;
14231 if (break_on_non_space) return -1;
14232 break;
14233 }
14234 }
14235
14236 return column;
14237}
14238
14243static void
14244parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14245 // If these warnings are disabled (unlikely), then we can just return.
14246 if (!parser->warn_mismatched_indentation) return;
14247
14248 // If the tokens are on the same line, we do not warn.
14249 size_t closing_newline_index = token_newline_index(parser);
14250 if (opening_newline_index == closing_newline_index) return;
14251
14252 // If the opening token has anything other than spaces or tabs before it,
14253 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14254 // and the `if` immediately follows an `else` keyword.
14255 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14256 if (!if_after_else && (opening_column == -1)) return;
14257
14258 // Get a reference to the closing token off the current parser. This assumes
14259 // that the caller has placed this in the correct position.
14260 pm_token_t *closing_token = &parser->current;
14261
14262 // If the tokens are at the same indentation, we do not warn.
14263 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14264 if ((closing_column == -1) || (opening_column == closing_column)) return;
14265
14266 // If the closing column is greater than the opening column and we are
14267 // allowing indentation, then we do not warn.
14268 if (allow_indent && (closing_column > opening_column)) return;
14269
14270 // Otherwise, add a warning.
14271 PM_PARSER_WARN_FORMAT(
14272 parser,
14273 closing_token->start,
14274 closing_token->end,
14275 PM_WARN_INDENTATION_MISMATCH,
14276 (int) (closing_token->end - closing_token->start),
14277 (const char *) closing_token->start,
14278 (int) (opening_token->end - opening_token->start),
14279 (const char *) opening_token->start,
14280 ((int32_t) opening_newline_index) + parser->start_line
14281 );
14282}
14283
14284typedef enum {
14285 PM_RESCUES_BEGIN = 1,
14286 PM_RESCUES_BLOCK,
14287 PM_RESCUES_CLASS,
14288 PM_RESCUES_DEF,
14289 PM_RESCUES_LAMBDA,
14290 PM_RESCUES_MODULE,
14291 PM_RESCUES_SCLASS
14292} pm_rescues_type_t;
14293
14298static inline void
14299parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14300 pm_rescue_node_t *current = NULL;
14301
14302 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14303 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14304 parser_lex(parser);
14305
14306 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14307
14308 switch (parser->current.type) {
14309 case PM_TOKEN_EQUAL_GREATER: {
14310 // Here we have an immediate => after the rescue keyword, in which case
14311 // we're going to have an empty list of exceptions to rescue (which
14312 // implies StandardError).
14313 parser_lex(parser);
14314 pm_rescue_node_operator_set(rescue, &parser->previous);
14315
14316 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14317 reference = parse_target(parser, reference, false, false);
14318
14319 pm_rescue_node_reference_set(rescue, reference);
14320 break;
14321 }
14322 case PM_TOKEN_NEWLINE:
14323 case PM_TOKEN_SEMICOLON:
14324 case PM_TOKEN_KEYWORD_THEN:
14325 // Here we have a terminator for the rescue keyword, in which
14326 // case we're going to just continue on.
14327 break;
14328 default: {
14329 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14330 // Here we have something that could be an exception expression, so
14331 // we'll attempt to parse it here and any others delimited by commas.
14332
14333 do {
14334 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14335 pm_rescue_node_exceptions_append(rescue, expression);
14336
14337 // If we hit a newline, then this is the end of the rescue expression. We
14338 // can continue on to parse the statements.
14339 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14340
14341 // If we hit a `=>` then we're going to parse the exception variable. Once
14342 // we've done that, we'll break out of the loop and parse the statements.
14343 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14344 pm_rescue_node_operator_set(rescue, &parser->previous);
14345
14346 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14347 reference = parse_target(parser, reference, false, false);
14348
14349 pm_rescue_node_reference_set(rescue, reference);
14350 break;
14351 }
14352 } while (accept1(parser, PM_TOKEN_COMMA));
14353 }
14354 }
14355 }
14356
14357 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14358 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14359 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14360 }
14361 } else {
14362 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14363 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14364 }
14365
14366 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14367 pm_accepts_block_stack_push(parser, true);
14368 pm_context_t context;
14369
14370 switch (type) {
14371 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14372 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14373 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14374 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14375 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14376 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14377 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14378 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14379 }
14380
14381 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14382 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14383
14384 pm_accepts_block_stack_pop(parser);
14385 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14386 }
14387
14388 if (current == NULL) {
14389 pm_begin_node_rescue_clause_set(parent_node, rescue);
14390 } else {
14391 pm_rescue_node_subsequent_set(current, rescue);
14392 }
14393
14394 current = rescue;
14395 }
14396
14397 // The end node locations on rescue nodes will not be set correctly
14398 // since we won't know the end until we've found all subsequent
14399 // clauses. This sets the end location on all rescues once we know it.
14400 if (current != NULL) {
14401 const uint8_t *end_to_set = current->base.location.end;
14402 pm_rescue_node_t *clause = parent_node->rescue_clause;
14403
14404 while (clause != NULL) {
14405 clause->base.location.end = end_to_set;
14406 clause = clause->subsequent;
14407 }
14408 }
14409
14410 pm_token_t else_keyword;
14411 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14412 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14413 opening_newline_index = token_newline_index(parser);
14414
14415 else_keyword = parser->current;
14416 opening = &else_keyword;
14417
14418 parser_lex(parser);
14419 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14420
14421 pm_statements_node_t *else_statements = NULL;
14422 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14423 pm_accepts_block_stack_push(parser, true);
14424 pm_context_t context;
14425
14426 switch (type) {
14427 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14428 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14429 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14430 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14431 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14432 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14433 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14434 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14435 }
14436
14437 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14438 pm_accepts_block_stack_pop(parser);
14439
14440 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14441 }
14442
14443 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14444 pm_begin_node_else_clause_set(parent_node, else_clause);
14445
14446 // If we don't have a `current` rescue node, then this is a dangling
14447 // else, and it's an error.
14448 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14449 }
14450
14451 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14452 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14453 pm_token_t ensure_keyword = parser->current;
14454
14455 parser_lex(parser);
14456 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14457
14458 pm_statements_node_t *ensure_statements = NULL;
14459 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14460 pm_accepts_block_stack_push(parser, true);
14461 pm_context_t context;
14462
14463 switch (type) {
14464 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14465 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14466 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14467 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14468 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14469 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14470 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14471 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14472 }
14473
14474 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14475 pm_accepts_block_stack_pop(parser);
14476
14477 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14478 }
14479
14480 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14481 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14482 }
14483
14484 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14485 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14486 pm_begin_node_end_keyword_set(parent_node, &parser->current);
14487 } else {
14488 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
14489 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
14490 }
14491}
14492
14497static pm_begin_node_t *
14498parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14499 pm_token_t begin_keyword = not_provided(parser);
14500 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
14501
14502 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14503 node->base.location.start = start;
14504
14505 return node;
14506}
14507
14512parse_block_parameters(
14513 pm_parser_t *parser,
14514 bool allows_trailing_comma,
14515 const pm_token_t *opening,
14516 bool is_lambda_literal,
14517 bool accepts_blocks_in_defaults,
14518 uint16_t depth
14519) {
14520 pm_parameters_node_t *parameters = NULL;
14521 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14522 if (!is_lambda_literal) {
14523 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14524 }
14525 parameters = parse_parameters(
14526 parser,
14527 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14528 false,
14529 allows_trailing_comma,
14530 false,
14531 accepts_blocks_in_defaults,
14532 true,
14533 (uint16_t) (depth + 1)
14534 );
14535 if (!is_lambda_literal) {
14536 context_pop(parser);
14537 }
14538 }
14539
14540 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14541 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
14542 accept1(parser, PM_TOKEN_NEWLINE);
14543
14544 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14545 do {
14546 switch (parser->current.type) {
14547 case PM_TOKEN_CONSTANT:
14548 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14549 parser_lex(parser);
14550 break;
14551 case PM_TOKEN_INSTANCE_VARIABLE:
14552 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14553 parser_lex(parser);
14554 break;
14555 case PM_TOKEN_GLOBAL_VARIABLE:
14556 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14557 parser_lex(parser);
14558 break;
14559 case PM_TOKEN_CLASS_VARIABLE:
14560 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14561 parser_lex(parser);
14562 break;
14563 default:
14564 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14565 break;
14566 }
14567
14568 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14569 pm_parser_local_add_token(parser, &parser->previous, 1);
14570
14571 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14572 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14573
14574 pm_block_parameters_node_append_local(block_parameters, local);
14575 } while (accept1(parser, PM_TOKEN_COMMA));
14576 }
14577 }
14578
14579 return block_parameters;
14580}
14581
14586static bool
14587outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14588 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14589 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14590 }
14591
14592 return false;
14593}
14594
14600static const char * const pm_numbered_parameter_names[] = {
14601 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14602};
14603
14609static pm_node_t *
14610parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14611 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14612
14613 // If we have ordinary parameters, then we will return them as the set of
14614 // parameters.
14615 if (parameters != NULL) {
14616 // If we also have implicit parameters, then this is an error.
14617 if (implicit_parameters->size > 0) {
14618 pm_node_t *node = implicit_parameters->nodes[0];
14619
14620 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14621 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14622 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14623 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14624 } else {
14625 assert(false && "unreachable");
14626 }
14627 }
14628
14629 return parameters;
14630 }
14631
14632 // If we don't have any implicit parameters, then the set of parameters is
14633 // NULL.
14634 if (implicit_parameters->size == 0) {
14635 return NULL;
14636 }
14637
14638 // If we don't have ordinary parameters, then we now must validate our set
14639 // of implicit parameters. We can only have numbered parameters or it, but
14640 // they cannot be mixed.
14641 uint8_t numbered_parameter = 0;
14642 bool it_parameter = false;
14643
14644 for (size_t index = 0; index < implicit_parameters->size; index++) {
14645 pm_node_t *node = implicit_parameters->nodes[index];
14646
14647 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14648 if (it_parameter) {
14649 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14650 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14651 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14652 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14653 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14654 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14655 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14656 } else {
14657 assert(false && "unreachable");
14658 }
14659 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14660 if (numbered_parameter > 0) {
14661 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14662 } else {
14663 it_parameter = true;
14664 }
14665 }
14666 }
14667
14668 if (numbered_parameter > 0) {
14669 // Go through the parent scopes and mark them as being disallowed from
14670 // using numbered parameters because this inner scope is using them.
14671 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14672 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14673 }
14674
14675 const pm_location_t location = { .start = opening->start, .end = closing->end };
14676 return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
14677 }
14678
14679 if (it_parameter) {
14680 return UP(pm_it_parameters_node_create(parser, opening, closing));
14681 }
14682
14683 return NULL;
14684}
14685
14689static pm_block_node_t *
14690parse_block(pm_parser_t *parser, uint16_t depth) {
14691 pm_token_t opening = parser->previous;
14692 accept1(parser, PM_TOKEN_NEWLINE);
14693
14694 pm_accepts_block_stack_push(parser, true);
14695 pm_parser_scope_push(parser, false);
14696
14697 pm_block_parameters_node_t *block_parameters = NULL;
14698
14699 if (accept1(parser, PM_TOKEN_PIPE)) {
14700 pm_token_t block_parameters_opening = parser->previous;
14701 if (match1(parser, PM_TOKEN_PIPE)) {
14702 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14703 parser->command_start = true;
14704 parser_lex(parser);
14705 } else {
14706 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14707 accept1(parser, PM_TOKEN_NEWLINE);
14708 parser->command_start = true;
14709 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14710 }
14711
14712 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
14713 }
14714
14715 accept1(parser, PM_TOKEN_NEWLINE);
14716 pm_node_t *statements = NULL;
14717
14718 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14719 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14720 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14721 }
14722
14723 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
14724 } else {
14725 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14726 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14727 pm_accepts_block_stack_push(parser, true);
14728 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14729 pm_accepts_block_stack_pop(parser);
14730 }
14731
14732 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14733 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14734 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14735 }
14736 }
14737
14738 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
14739 }
14740
14741 pm_constant_id_list_t locals;
14742 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14743 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14744
14745 pm_parser_scope_pop(parser);
14746 pm_accepts_block_stack_pop(parser);
14747
14748 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14749}
14750
14756static bool
14757parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14758 bool found = false;
14759
14760 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14761 found |= true;
14762 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14763
14764 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14765 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14766 } else {
14767 pm_accepts_block_stack_push(parser, true);
14768 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14769
14770 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14771 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14772 parser->previous.start = parser->previous.end;
14773 parser->previous.type = PM_TOKEN_MISSING;
14774 }
14775
14776 pm_accepts_block_stack_pop(parser);
14777 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14778 }
14779 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14780 found |= true;
14781 pm_accepts_block_stack_push(parser, false);
14782
14783 // If we get here, then the subsequent token cannot be used as an infix
14784 // operator. In this case we assume the subsequent token is part of an
14785 // argument to this method call.
14786 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14787
14788 // If we have done with the arguments and still not consumed the comma,
14789 // then we have a trailing comma where we need to check whether it is
14790 // allowed or not.
14791 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14792 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14793 }
14794
14795 pm_accepts_block_stack_pop(parser);
14796 }
14797
14798 // If we're at the end of the arguments, we can now check if there is a block
14799 // node that starts with a {. If there is, then we can parse it and add it to
14800 // the arguments.
14801 if (accepts_block) {
14802 pm_block_node_t *block = NULL;
14803
14804 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14805 found |= true;
14806 block = parse_block(parser, (uint16_t) (depth + 1));
14807 pm_arguments_validate_block(parser, arguments, block);
14808 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14809 found |= true;
14810 block = parse_block(parser, (uint16_t) (depth + 1));
14811 }
14812
14813 if (block != NULL) {
14814 if (arguments->block == NULL && !arguments->has_forwarding) {
14815 arguments->block = UP(block);
14816 } else {
14817 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14818
14819 if (arguments->block != NULL) {
14820 if (arguments->arguments == NULL) {
14821 arguments->arguments = pm_arguments_node_create(parser);
14822 }
14823 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
14824 }
14825 arguments->block = UP(block);
14826 }
14827 }
14828 }
14829
14830 return found;
14831}
14832
14837static void
14838parse_return(pm_parser_t *parser, pm_node_t *node) {
14839 bool in_sclass = false;
14840 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14841 switch (context_node->context) {
14845 case PM_CONTEXT_BEGIN:
14846 case PM_CONTEXT_CASE_IN:
14849 case PM_CONTEXT_DEFINED:
14850 case PM_CONTEXT_ELSE:
14851 case PM_CONTEXT_ELSIF:
14852 case PM_CONTEXT_EMBEXPR:
14854 case PM_CONTEXT_FOR:
14855 case PM_CONTEXT_IF:
14857 case PM_CONTEXT_MAIN:
14859 case PM_CONTEXT_PARENS:
14860 case PM_CONTEXT_POSTEXE:
14862 case PM_CONTEXT_PREEXE:
14864 case PM_CONTEXT_TERNARY:
14865 case PM_CONTEXT_UNLESS:
14866 case PM_CONTEXT_UNTIL:
14867 case PM_CONTEXT_WHILE:
14868 // Keep iterating up the lists of contexts, because returns can
14869 // see through these.
14870 continue;
14874 case PM_CONTEXT_SCLASS:
14875 in_sclass = true;
14876 continue;
14880 case PM_CONTEXT_CLASS:
14884 case PM_CONTEXT_MODULE:
14885 // These contexts are invalid for a return.
14886 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14887 return;
14898 case PM_CONTEXT_DEF:
14904 // These contexts are valid for a return, and we should not
14905 // continue to loop.
14906 return;
14907 case PM_CONTEXT_NONE:
14908 // This case should never happen.
14909 assert(false && "unreachable");
14910 break;
14911 }
14912 }
14913 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14914 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14915 }
14916}
14917
14922static void
14923parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
14924 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14925 switch (context_node->context) {
14932 case PM_CONTEXT_DEFINED:
14933 case PM_CONTEXT_FOR:
14940 case PM_CONTEXT_POSTEXE:
14941 case PM_CONTEXT_UNTIL:
14942 case PM_CONTEXT_WHILE:
14943 // These are the good cases. We're allowed to have a block exit
14944 // in these contexts.
14945 return;
14946 case PM_CONTEXT_DEF:
14951 case PM_CONTEXT_MAIN:
14952 case PM_CONTEXT_PREEXE:
14953 case PM_CONTEXT_SCLASS:
14957 // These are the bad cases. We're not allowed to have a block
14958 // exit in these contexts.
14959 //
14960 // If we get here, then we're about to mark this block exit
14961 // as invalid. However, it could later _become_ valid if we
14962 // find a trailing while/until on the expression. In this
14963 // case instead of adding the error here, we'll add the
14964 // block exit to the list of exits for the expression, and
14965 // the node parsing will handle validating it instead.
14966 assert(parser->current_block_exits != NULL);
14967 pm_node_list_append(parser->current_block_exits, node);
14968 return;
14972 case PM_CONTEXT_BEGIN:
14973 case PM_CONTEXT_CASE_IN:
14978 case PM_CONTEXT_CLASS:
14980 case PM_CONTEXT_ELSE:
14981 case PM_CONTEXT_ELSIF:
14982 case PM_CONTEXT_EMBEXPR:
14984 case PM_CONTEXT_IF:
14988 case PM_CONTEXT_MODULE:
14990 case PM_CONTEXT_PARENS:
14993 case PM_CONTEXT_TERNARY:
14994 case PM_CONTEXT_UNLESS:
14995 // In these contexts we should continue walking up the list of
14996 // contexts.
14997 break;
14998 case PM_CONTEXT_NONE:
14999 // This case should never happen.
15000 assert(false && "unreachable");
15001 break;
15002 }
15003 }
15004}
15005
15010static pm_node_list_t *
15011push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15012 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15013 parser->current_block_exits = current_block_exits;
15014 return previous_block_exits;
15015}
15016
15022static void
15023flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15024 pm_node_t *block_exit;
15025 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15026 const char *type;
15027
15028 switch (PM_NODE_TYPE(block_exit)) {
15029 case PM_BREAK_NODE: type = "break"; break;
15030 case PM_NEXT_NODE: type = "next"; break;
15031 case PM_REDO_NODE: type = "redo"; break;
15032 default: assert(false && "unreachable"); type = ""; break;
15033 }
15034
15035 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15036 }
15037
15038 parser->current_block_exits = previous_block_exits;
15039}
15040
15045static void
15046pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15047 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15048 // If we matched a trailing while/until, then all of the block exits in
15049 // the contained list are valid. In this case we do not need to do
15050 // anything.
15051 parser->current_block_exits = previous_block_exits;
15052 } else if (previous_block_exits != NULL) {
15053 // If we did not matching a trailing while/until, then all of the block
15054 // exits contained in the list are invalid for this specific context.
15055 // However, they could still become valid in a higher level context if
15056 // there is another list above this one. In this case we'll push all of
15057 // the block exits up to the previous list.
15058 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15059 parser->current_block_exits = previous_block_exits;
15060 } else {
15061 // If we did not match a trailing while/until and this was the last
15062 // chance to do so, then all of the block exits in the list are invalid
15063 // and we need to add an error for each of them.
15064 flush_block_exits(parser, previous_block_exits);
15065 }
15066}
15067
15068static inline pm_node_t *
15069parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15070 context_push(parser, PM_CONTEXT_PREDICATE);
15071 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15072 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15073
15074 // Predicates are closed by a term, a "then", or a term and then a "then".
15075 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15076
15077 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15078 predicate_closed = true;
15079 *then_keyword = parser->previous;
15080 }
15081
15082 if (!predicate_closed) {
15083 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15084 }
15085
15086 context_pop(parser);
15087 return predicate;
15088}
15089
15090static inline pm_node_t *
15091parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15092 pm_node_list_t current_block_exits = { 0 };
15093 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15094
15095 pm_token_t keyword = parser->previous;
15096 pm_token_t then_keyword = not_provided(parser);
15097
15098 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15099 pm_statements_node_t *statements = NULL;
15100
15101 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15102 pm_accepts_block_stack_push(parser, true);
15103 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15104 pm_accepts_block_stack_pop(parser);
15105 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15106 }
15107
15108 pm_token_t end_keyword = not_provided(parser);
15109 pm_node_t *parent = NULL;
15110
15111 switch (context) {
15112 case PM_CONTEXT_IF:
15113 parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15114 break;
15115 case PM_CONTEXT_UNLESS:
15116 parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
15117 break;
15118 default:
15119 assert(false && "unreachable");
15120 break;
15121 }
15122
15123 pm_node_t *current = parent;
15124
15125 // Parse any number of elsif clauses. This will form a linked list of if
15126 // nodes pointing to each other from the top.
15127 if (context == PM_CONTEXT_IF) {
15128 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15129 if (parser_end_of_line_p(parser)) {
15130 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15131 }
15132
15133 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15134 pm_token_t elsif_keyword = parser->current;
15135 parser_lex(parser);
15136
15137 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15138 pm_accepts_block_stack_push(parser, true);
15139
15140 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15141 pm_accepts_block_stack_pop(parser);
15142 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15143
15144 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15145 ((pm_if_node_t *) current)->subsequent = elsif;
15146 current = elsif;
15147 }
15148 }
15149
15150 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15151 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15152 opening_newline_index = token_newline_index(parser);
15153
15154 parser_lex(parser);
15155 pm_token_t else_keyword = parser->previous;
15156
15157 pm_accepts_block_stack_push(parser, true);
15158 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15159 pm_accepts_block_stack_pop(parser);
15160
15161 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15162 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15163 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15164
15165 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15166
15167 switch (context) {
15168 case PM_CONTEXT_IF:
15169 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15170 break;
15171 case PM_CONTEXT_UNLESS:
15172 ((pm_unless_node_t *) parent)->else_clause = else_node;
15173 break;
15174 default:
15175 assert(false && "unreachable");
15176 break;
15177 }
15178 } else {
15179 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15180 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15181 }
15182
15183 // Set the appropriate end location for all of the nodes in the subtree.
15184 switch (context) {
15185 case PM_CONTEXT_IF: {
15186 pm_node_t *current = parent;
15187 bool recursing = true;
15188
15189 while (recursing) {
15190 switch (PM_NODE_TYPE(current)) {
15191 case PM_IF_NODE:
15192 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15193 current = ((pm_if_node_t *) current)->subsequent;
15194 recursing = current != NULL;
15195 break;
15196 case PM_ELSE_NODE:
15197 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15198 recursing = false;
15199 break;
15200 default: {
15201 recursing = false;
15202 break;
15203 }
15204 }
15205 }
15206 break;
15207 }
15208 case PM_CONTEXT_UNLESS:
15209 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15210 break;
15211 default:
15212 assert(false && "unreachable");
15213 break;
15214 }
15215
15216 pop_block_exits(parser, previous_block_exits);
15217 pm_node_list_free(&current_block_exits);
15218
15219 return parent;
15220}
15221
15226#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15227 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15228 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15229 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15230 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15231 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15232 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15233 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15234 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15235 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15236 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15237
15242#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15243 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15244 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15245 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15246 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15247 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15248 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15249 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15250
15256#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15257 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15258 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15259 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15260 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15261 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15262 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15263 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15264 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15265
15270#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15271 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15272 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15273 case PM_TOKEN_CLASS_VARIABLE
15274
15279#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15280 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15281 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15282 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15283
15284// Assert here that the flags are the same so that we can safely switch the type
15285// of the node without having to move the flags.
15286PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15287
15292static inline pm_node_flags_t
15293parse_unescaped_encoding(const pm_parser_t *parser) {
15294 if (parser->explicit_encoding != NULL) {
15296 // If the there's an explicit encoding and it's using a UTF-8 escape
15297 // sequence, then mark the string as UTF-8.
15298 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15299 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15300 // If there's a non-UTF-8 escape sequence being used, then the
15301 // string uses the source encoding, unless the source is marked as
15302 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15303 // order to keep the string valid.
15304 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15305 }
15306 }
15307 return 0;
15308}
15309
15314static pm_node_t *
15315parse_string_part(pm_parser_t *parser, uint16_t depth) {
15316 switch (parser->current.type) {
15317 // Here the lexer has returned to us plain string content. In this case
15318 // we'll create a string node that has no opening or closing and return that
15319 // as the part. These kinds of parts look like:
15320 //
15321 // "aaa #{bbb} #@ccc ddd"
15322 // ^^^^ ^ ^^^^
15323 case PM_TOKEN_STRING_CONTENT: {
15324 pm_token_t opening = not_provided(parser);
15325 pm_token_t closing = not_provided(parser);
15326
15327 pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
15328 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15329
15330 parser_lex(parser);
15331 return node;
15332 }
15333 // Here the lexer has returned the beginning of an embedded expression. In
15334 // that case we'll parse the inner statements and return that as the part.
15335 // These kinds of parts look like:
15336 //
15337 // "aaa #{bbb} #@ccc ddd"
15338 // ^^^^^^
15339 case PM_TOKEN_EMBEXPR_BEGIN: {
15340 // Ruby disallows seeing encoding around interpolation in strings,
15341 // even though it is known at parse time.
15342 parser->explicit_encoding = NULL;
15343
15344 pm_lex_state_t state = parser->lex_state;
15345 int brace_nesting = parser->brace_nesting;
15346
15347 parser->brace_nesting = 0;
15348 lex_state_set(parser, PM_LEX_STATE_BEG);
15349 parser_lex(parser);
15350
15351 pm_token_t opening = parser->previous;
15352 pm_statements_node_t *statements = NULL;
15353
15354 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
15355 pm_accepts_block_stack_push(parser, true);
15356 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15357 pm_accepts_block_stack_pop(parser);
15358 }
15359
15360 parser->brace_nesting = brace_nesting;
15361 lex_state_set(parser, state);
15362
15363 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15364 pm_token_t closing = parser->previous;
15365
15366 // If this set of embedded statements only contains a single
15367 // statement, then Ruby does not consider it as a possible statement
15368 // that could emit a line event.
15369 if (statements != NULL && statements->body.size == 1) {
15370 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15371 }
15372
15373 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
15374 }
15375
15376 // Here the lexer has returned the beginning of an embedded variable.
15377 // In that case we'll parse the variable and create an appropriate node
15378 // for it and then return that node. These kinds of parts look like:
15379 //
15380 // "aaa #{bbb} #@ccc ddd"
15381 // ^^^^^
15382 case PM_TOKEN_EMBVAR: {
15383 // Ruby disallows seeing encoding around interpolation in strings,
15384 // even though it is known at parse time.
15385 parser->explicit_encoding = NULL;
15386
15387 lex_state_set(parser, PM_LEX_STATE_BEG);
15388 parser_lex(parser);
15389
15390 pm_token_t operator = parser->previous;
15391 pm_node_t *variable;
15392
15393 switch (parser->current.type) {
15394 // In this case a back reference is being interpolated. We'll
15395 // create a global variable read node.
15396 case PM_TOKEN_BACK_REFERENCE:
15397 parser_lex(parser);
15398 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15399 break;
15400 // In this case an nth reference is being interpolated. We'll
15401 // create a global variable read node.
15402 case PM_TOKEN_NUMBERED_REFERENCE:
15403 parser_lex(parser);
15404 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15405 break;
15406 // In this case a global variable is being interpolated. We'll
15407 // create a global variable read node.
15408 case PM_TOKEN_GLOBAL_VARIABLE:
15409 parser_lex(parser);
15410 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15411 break;
15412 // In this case an instance variable is being interpolated.
15413 // We'll create an instance variable read node.
15414 case PM_TOKEN_INSTANCE_VARIABLE:
15415 parser_lex(parser);
15416 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15417 break;
15418 // In this case a class variable is being interpolated. We'll
15419 // create a class variable read node.
15420 case PM_TOKEN_CLASS_VARIABLE:
15421 parser_lex(parser);
15422 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15423 break;
15424 // We can hit here if we got an invalid token. In that case
15425 // we'll not attempt to lex this token and instead just return a
15426 // missing node.
15427 default:
15428 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15429 variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15430 break;
15431 }
15432
15433 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15434 }
15435 default:
15436 parser_lex(parser);
15437 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15438 return NULL;
15439 }
15440}
15441
15447static const uint8_t *
15448parse_operator_symbol_name(const pm_token_t *name) {
15449 switch (name->type) {
15450 case PM_TOKEN_TILDE:
15451 case PM_TOKEN_BANG:
15452 if (name->end[-1] == '@') return name->end - 1;
15454 default:
15455 return name->end;
15456 }
15457}
15458
15459static pm_node_t *
15460parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15461 pm_token_t closing = not_provided(parser);
15462 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
15463
15464 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15465
15466 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15467 parser_lex(parser);
15468
15469 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15470 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15471
15472 return UP(symbol);
15473}
15474
15480static pm_node_t *
15481parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15482 const pm_token_t opening = parser->previous;
15483
15484 if (lex_mode->mode != PM_LEX_STRING) {
15485 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15486
15487 switch (parser->current.type) {
15488 case PM_CASE_OPERATOR:
15489 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15490 case PM_TOKEN_IDENTIFIER:
15491 case PM_TOKEN_CONSTANT:
15492 case PM_TOKEN_INSTANCE_VARIABLE:
15493 case PM_TOKEN_METHOD_NAME:
15494 case PM_TOKEN_CLASS_VARIABLE:
15495 case PM_TOKEN_GLOBAL_VARIABLE:
15496 case PM_TOKEN_NUMBERED_REFERENCE:
15497 case PM_TOKEN_BACK_REFERENCE:
15498 case PM_CASE_KEYWORD:
15499 parser_lex(parser);
15500 break;
15501 default:
15502 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15503 break;
15504 }
15505
15506 pm_token_t closing = not_provided(parser);
15507 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15508
15509 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15510 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15511
15512 return UP(symbol);
15513 }
15514
15515 if (lex_mode->as.string.interpolation) {
15516 // If we have the end of the symbol, then we can return an empty symbol.
15517 if (match1(parser, PM_TOKEN_STRING_END)) {
15518 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15519 parser_lex(parser);
15520
15521 pm_token_t content = not_provided(parser);
15522 pm_token_t closing = parser->previous;
15523 return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
15524 }
15525
15526 // Now we can parse the first part of the symbol.
15527 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15528
15529 // If we got a string part, then it's possible that we could transform
15530 // what looks like an interpolated symbol into a regular symbol.
15531 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15532 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15533 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15534
15535 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15536 }
15537
15538 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15539 if (part) pm_interpolated_symbol_node_append(symbol, part);
15540
15541 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15542 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15543 pm_interpolated_symbol_node_append(symbol, part);
15544 }
15545 }
15546
15547 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15548 if (match1(parser, PM_TOKEN_EOF)) {
15549 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15550 } else {
15551 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15552 }
15553
15554 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15555 return UP(symbol);
15556 }
15557
15558 pm_token_t content;
15559 pm_string_t unescaped;
15560
15561 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15562 content = parser->current;
15563 unescaped = parser->current_string;
15564 parser_lex(parser);
15565
15566 // If we have two string contents in a row, then the content of this
15567 // symbol is split because of heredoc contents. This looks like:
15568 //
15569 // <<A; :'a
15570 // A
15571 // b'
15572 //
15573 // In this case, the best way we have to represent this is as an
15574 // interpolated string node, so that's what we'll do here.
15575 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15576 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15577 pm_token_t bounds = not_provided(parser);
15578
15579 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
15580 pm_interpolated_symbol_node_append(symbol, part);
15581
15582 part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
15583 pm_interpolated_symbol_node_append(symbol, part);
15584
15585 if (next_state != PM_LEX_STATE_NONE) {
15586 lex_state_set(parser, next_state);
15587 }
15588
15589 parser_lex(parser);
15590 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15591
15592 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15593 return UP(symbol);
15594 }
15595 } else {
15596 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15597 pm_string_shared_init(&unescaped, content.start, content.end);
15598 }
15599
15600 if (next_state != PM_LEX_STATE_NONE) {
15601 lex_state_set(parser, next_state);
15602 }
15603
15604 if (match1(parser, PM_TOKEN_EOF)) {
15605 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15606 } else {
15607 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15608 }
15609
15610 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15611}
15612
15617static inline pm_node_t *
15618parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15619 switch (parser->current.type) {
15620 case PM_CASE_OPERATOR: {
15621 const pm_token_t opening = not_provided(parser);
15622 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
15623 }
15624 case PM_CASE_KEYWORD:
15625 case PM_TOKEN_CONSTANT:
15626 case PM_TOKEN_IDENTIFIER:
15627 case PM_TOKEN_METHOD_NAME: {
15628 parser_lex(parser);
15629
15630 pm_token_t opening = not_provided(parser);
15631 pm_token_t closing = not_provided(parser);
15632 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15633
15634 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15635 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15636
15637 return UP(symbol);
15638 }
15639 case PM_TOKEN_SYMBOL_BEGIN: {
15640 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15641 parser_lex(parser);
15642
15643 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15644 }
15645 default:
15646 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15647 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15648 }
15649}
15650
15657static inline pm_node_t *
15658parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15659 switch (parser->current.type) {
15660 case PM_CASE_OPERATOR: {
15661 const pm_token_t opening = not_provided(parser);
15662 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15663 }
15664 case PM_CASE_KEYWORD:
15665 case PM_TOKEN_CONSTANT:
15666 case PM_TOKEN_IDENTIFIER:
15667 case PM_TOKEN_METHOD_NAME: {
15668 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15669 parser_lex(parser);
15670
15671 pm_token_t opening = not_provided(parser);
15672 pm_token_t closing = not_provided(parser);
15673 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15674
15675 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15676 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15677
15678 return UP(symbol);
15679 }
15680 case PM_TOKEN_SYMBOL_BEGIN: {
15681 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15682 parser_lex(parser);
15683
15684 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15685 }
15686 case PM_TOKEN_BACK_REFERENCE:
15687 parser_lex(parser);
15688 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15689 case PM_TOKEN_NUMBERED_REFERENCE:
15690 parser_lex(parser);
15691 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15692 case PM_TOKEN_GLOBAL_VARIABLE:
15693 parser_lex(parser);
15694 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15695 default:
15696 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15697 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15698 }
15699}
15700
15705static pm_node_t *
15706parse_variable(pm_parser_t *parser) {
15707 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15708 int depth;
15709 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
15710
15711 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15712 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15713 }
15714
15715 pm_scope_t *current_scope = parser->current_scope;
15716 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15717 if (is_numbered_param) {
15718 // When you use a numbered parameter, it implies the existence of
15719 // all of the locals that exist before it. For example, referencing
15720 // _2 means that _1 must exist. Therefore here we loop through all
15721 // of the possibilities and add them into the constant pool.
15722 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15723 for (uint8_t number = 1; number <= maximum; number++) {
15724 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15725 }
15726
15727 if (!match1(parser, PM_TOKEN_EQUAL)) {
15728 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15729 }
15730
15731 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15732 pm_node_list_append(&current_scope->implicit_parameters, node);
15733
15734 return node;
15735 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15736 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15737 pm_node_list_append(&current_scope->implicit_parameters, node);
15738
15739 return node;
15740 }
15741 }
15742
15743 return NULL;
15744}
15745
15749static pm_node_t *
15750parse_variable_call(pm_parser_t *parser) {
15751 pm_node_flags_t flags = 0;
15752
15753 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15754 pm_node_t *node = parse_variable(parser);
15755 if (node != NULL) return node;
15756 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15757 }
15758
15759 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15760 pm_node_flag_set(UP(node), flags);
15761
15762 return UP(node);
15763}
15764
15770static inline pm_token_t
15771parse_method_definition_name(pm_parser_t *parser) {
15772 switch (parser->current.type) {
15773 case PM_CASE_KEYWORD:
15774 case PM_TOKEN_CONSTANT:
15775 case PM_TOKEN_METHOD_NAME:
15776 parser_lex(parser);
15777 return parser->previous;
15778 case PM_TOKEN_IDENTIFIER:
15779 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
15780 parser_lex(parser);
15781 return parser->previous;
15782 case PM_CASE_OPERATOR:
15783 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15784 parser_lex(parser);
15785 return parser->previous;
15786 default:
15787 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15788 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
15789 }
15790}
15791
15792static void
15793parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
15794 // Get a reference to the string struct that is being held by the string
15795 // node. This is the value we're going to actually manipulate.
15796 pm_string_ensure_owned(string);
15797
15798 // Now get the bounds of the existing string. We'll use this as a
15799 // destination to move bytes into. We'll also use it for bounds checking
15800 // since we don't require that these strings be null terminated.
15801 size_t dest_length = pm_string_length(string);
15802 const uint8_t *source_cursor = (uint8_t *) string->source;
15803 const uint8_t *source_end = source_cursor + dest_length;
15804
15805 // We're going to move bytes backward in the string when we get leading
15806 // whitespace, so we'll maintain a pointer to the current position in the
15807 // string that we're writing to.
15808 size_t trimmed_whitespace = 0;
15809
15810 // While we haven't reached the amount of common whitespace that we need to
15811 // trim and we haven't reached the end of the string, we'll keep trimming
15812 // whitespace. Trimming in this context means skipping over these bytes such
15813 // that they aren't copied into the new string.
15814 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15815 if (*source_cursor == '\t') {
15816 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15817 if (trimmed_whitespace > common_whitespace) break;
15818 } else {
15819 trimmed_whitespace++;
15820 }
15821
15822 source_cursor++;
15823 dest_length--;
15824 }
15825
15826 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
15827 string->length = dest_length;
15828}
15829
15833static void
15834parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15835 // The next node should be dedented if it's the first node in the list or if
15836 // it follows a string node.
15837 bool dedent_next = true;
15838
15839 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15840 // keep around two indices: a read and a write. If we end up trimming all of
15841 // the whitespace from a node, then we'll drop it from the list entirely.
15842 size_t write_index = 0;
15843
15844 pm_node_t *node;
15845 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15846 // We're not manipulating child nodes that aren't strings. In this case
15847 // we'll skip past it and indicate that the subsequent node should not
15848 // be dedented.
15849 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15850 nodes->nodes[write_index++] = node;
15851 dedent_next = false;
15852 continue;
15853 }
15854
15855 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15856 if (dedent_next) {
15857 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
15858 }
15859
15860 if (string_node->unescaped.length == 0) {
15861 pm_node_destroy(parser, node);
15862 } else {
15863 nodes->nodes[write_index++] = node;
15864 }
15865
15866 // We always dedent the next node if it follows a string node.
15867 dedent_next = true;
15868 }
15869
15870 nodes->size = write_index;
15871}
15872
15876static pm_token_t
15877parse_strings_empty_content(const uint8_t *location) {
15878 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15879}
15880
15884static inline pm_node_t *
15885parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15886 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15887 bool concating = false;
15888
15889 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15890 pm_node_t *node = NULL;
15891
15892 // Here we have found a string literal. We'll parse it and add it to
15893 // the list of strings.
15894 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15895 assert(lex_mode->mode == PM_LEX_STRING);
15896 bool lex_interpolation = lex_mode->as.string.interpolation;
15897 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15898
15899 pm_token_t opening = parser->current;
15900 parser_lex(parser);
15901
15902 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15903 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15904 // If we get here, then we have an end immediately after a
15905 // start. In that case we'll create an empty content token and
15906 // return an uninterpolated string.
15907 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15908 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15909
15910 pm_string_shared_init(&string->unescaped, content.start, content.end);
15911 node = UP(string);
15912 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15913 // If we get here, then we have an end of a label immediately
15914 // after a start. In that case we'll create an empty symbol
15915 // node.
15916 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15917 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
15918
15919 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
15920 node = UP(symbol);
15921
15922 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15923 } else if (!lex_interpolation) {
15924 // If we don't accept interpolation then we expect the string to
15925 // start with a single string content node.
15926 pm_string_t unescaped;
15927 pm_token_t content;
15928
15929 if (match1(parser, PM_TOKEN_EOF)) {
15930 unescaped = PM_STRING_EMPTY;
15931 content = not_provided(parser);
15932 } else {
15933 unescaped = parser->current_string;
15934 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
15935 content = parser->previous;
15936 }
15937
15938 // It is unfortunately possible to have multiple string content
15939 // nodes in a row in the case that there's heredoc content in
15940 // the middle of the string, like this cursed example:
15941 //
15942 // <<-END+'b
15943 // a
15944 // END
15945 // c'+'d'
15946 //
15947 // In that case we need to switch to an interpolated string to
15948 // be able to contain all of the parts.
15949 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15950 pm_node_list_t parts = { 0 };
15951
15952 pm_token_t delimiters = not_provided(parser);
15953 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
15954 pm_node_list_append(&parts, part);
15955
15956 do {
15957 part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
15958 pm_node_list_append(&parts, part);
15959 parser_lex(parser);
15960 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
15961
15962 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15963 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
15964
15965 pm_node_list_free(&parts);
15966 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15967 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
15968 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15969 } else if (match1(parser, PM_TOKEN_EOF)) {
15970 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
15971 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
15972 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
15973 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
15974 } else {
15975 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
15976 parser->previous.start = parser->previous.end;
15977 parser->previous.type = PM_TOKEN_MISSING;
15978 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
15979 }
15980 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15981 // In this case we've hit string content so we know the string
15982 // at least has something in it. We'll need to check if the
15983 // following token is the end (in which case we can return a
15984 // plain string) or if it's not then it has interpolation.
15985 pm_token_t content = parser->current;
15986 pm_string_t unescaped = parser->current_string;
15987 parser_lex(parser);
15988
15989 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15990 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
15991 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15992
15993 // Kind of odd behavior, but basically if we have an
15994 // unterminated string and it ends in a newline, we back up one
15995 // character so that the error message is on the last line of
15996 // content in the string.
15997 if (!accept1(parser, PM_TOKEN_STRING_END)) {
15998 const uint8_t *location = parser->previous.end;
15999 if (location > parser->start && location[-1] == '\n') location--;
16000 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16001
16002 parser->previous.start = parser->previous.end;
16003 parser->previous.type = PM_TOKEN_MISSING;
16004 }
16005 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16006 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16007 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16008 } else {
16009 // If we get here, then we have interpolation so we'll need
16010 // to create a string or symbol node with interpolation.
16011 pm_node_list_t parts = { 0 };
16012 pm_token_t string_opening = not_provided(parser);
16013 pm_token_t string_closing = not_provided(parser);
16014
16015 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
16016 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16017 pm_node_list_append(&parts, part);
16018
16019 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16020 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16021 pm_node_list_append(&parts, part);
16022 }
16023 }
16024
16025 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16026 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16027 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16028 } else if (match1(parser, PM_TOKEN_EOF)) {
16029 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16030 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16031 } else {
16032 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16033 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16034 }
16035
16036 pm_node_list_free(&parts);
16037 }
16038 } else {
16039 // If we get here, then the first part of the string is not plain
16040 // string content, in which case we need to parse the string as an
16041 // interpolated string.
16042 pm_node_list_t parts = { 0 };
16043 pm_node_t *part;
16044
16045 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16046 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16047 pm_node_list_append(&parts, part);
16048 }
16049 }
16050
16051 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16052 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16053 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16054 } else if (match1(parser, PM_TOKEN_EOF)) {
16055 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16056 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16057 } else {
16058 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16059 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16060 }
16061
16062 pm_node_list_free(&parts);
16063 }
16064
16065 if (current == NULL) {
16066 // If the node we just parsed is a symbol node, then we can't
16067 // concatenate it with anything else, so we can now return that
16068 // node.
16069 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16070 return node;
16071 }
16072
16073 // If we don't already have a node, then it's fine and we can just
16074 // set the result to be the node we just parsed.
16075 current = node;
16076 } else {
16077 // Otherwise we need to check the type of the node we just parsed.
16078 // If it cannot be concatenated with the previous node, then we'll
16079 // need to add a syntax error.
16080 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16081 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16082 }
16083
16084 // If we haven't already created our container for concatenation,
16085 // we'll do that now.
16086 if (!concating) {
16087 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16088 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16089 }
16090
16091 concating = true;
16092 pm_token_t bounds = not_provided(parser);
16093
16094 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16095 pm_interpolated_string_node_append(container, current);
16096 current = UP(container);
16097 }
16098
16099 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16100 }
16101 }
16102
16103 return current;
16104}
16105
16106#define PM_PARSE_PATTERN_SINGLE 0
16107#define PM_PARSE_PATTERN_TOP 1
16108#define PM_PARSE_PATTERN_MULTI 2
16109
16110static pm_node_t *
16111parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16112
16118static void
16119parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16120 // Skip this capture if it starts with an underscore.
16121 if (peek_at(parser, location->start) == '_') return;
16122
16123 if (pm_constant_id_list_includes(captures, capture)) {
16124 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16125 } else {
16126 pm_constant_id_list_append(captures, capture);
16127 }
16128}
16129
16133static pm_node_t *
16134parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16135 // Now, if there are any :: operators that follow, parse them as constant
16136 // path nodes.
16137 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16138 pm_token_t delimiter = parser->previous;
16139 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16140 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16141 }
16142
16143 // If there is a [ or ( that follows, then this is part of a larger pattern
16144 // expression. We'll parse the inner pattern here, then modify the returned
16145 // inner pattern with our constant path attached.
16146 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16147 return node;
16148 }
16149
16150 pm_token_t opening;
16151 pm_token_t closing;
16152 pm_node_t *inner = NULL;
16153
16154 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16155 opening = parser->previous;
16156 accept1(parser, PM_TOKEN_NEWLINE);
16157
16158 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16159 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16160 accept1(parser, PM_TOKEN_NEWLINE);
16161 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16162 }
16163
16164 closing = parser->previous;
16165 } else {
16166 parser_lex(parser);
16167 opening = parser->previous;
16168 accept1(parser, PM_TOKEN_NEWLINE);
16169
16170 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16171 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16172 accept1(parser, PM_TOKEN_NEWLINE);
16173 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16174 }
16175
16176 closing = parser->previous;
16177 }
16178
16179 if (!inner) {
16180 // If there was no inner pattern, then we have something like Foo() or
16181 // Foo[]. In that case we'll create an array pattern with no requireds.
16182 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16183 }
16184
16185 // Now that we have the inner pattern, check to see if it's an array, find,
16186 // or hash pattern. If it is, then we'll attach our constant path to it if
16187 // it doesn't already have a constant. If it's not one of those node types
16188 // or it does have a constant, then we'll create an array pattern.
16189 switch (PM_NODE_TYPE(inner)) {
16190 case PM_ARRAY_PATTERN_NODE: {
16191 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16192
16193 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16194 pattern_node->base.location.start = node->location.start;
16195 pattern_node->base.location.end = closing.end;
16196
16197 pattern_node->constant = node;
16198 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16199 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16200
16201 return UP(pattern_node);
16202 }
16203
16204 break;
16205 }
16206 case PM_FIND_PATTERN_NODE: {
16207 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16208
16209 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16210 pattern_node->base.location.start = node->location.start;
16211 pattern_node->base.location.end = closing.end;
16212
16213 pattern_node->constant = node;
16214 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16215 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16216
16217 return UP(pattern_node);
16218 }
16219
16220 break;
16221 }
16222 case PM_HASH_PATTERN_NODE: {
16223 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16224
16225 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16226 pattern_node->base.location.start = node->location.start;
16227 pattern_node->base.location.end = closing.end;
16228
16229 pattern_node->constant = node;
16230 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16231 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16232
16233 return UP(pattern_node);
16234 }
16235
16236 break;
16237 }
16238 default:
16239 break;
16240 }
16241
16242 // If we got here, then we didn't return one of the inner patterns by
16243 // attaching its constant. In this case we'll create an array pattern and
16244 // attach our constant to it.
16245 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16246 pm_array_pattern_node_requireds_append(pattern_node, inner);
16247 return UP(pattern_node);
16248}
16249
16253static pm_splat_node_t *
16254parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16255 assert(parser->previous.type == PM_TOKEN_USTAR);
16256 pm_token_t operator = parser->previous;
16257 pm_node_t *name = NULL;
16258
16259 // Rest patterns don't necessarily have a name associated with them. So we
16260 // will check for that here. If they do, then we'll add it to the local
16261 // table since this pattern will cause it to become a local variable.
16262 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16263 pm_token_t identifier = parser->previous;
16264 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16265
16266 int depth;
16267 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16268 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16269 }
16270
16271 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16272 name = UP(pm_local_variable_target_node_create(
16273 parser,
16274 &PM_LOCATION_TOKEN_VALUE(&identifier),
16275 constant_id,
16276 (uint32_t) (depth == -1 ? 0 : depth)
16277 ));
16278 }
16279
16280 // Finally we can return the created node.
16281 return pm_splat_node_create(parser, &operator, name);
16282}
16283
16287static pm_node_t *
16288parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16289 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16290 parser_lex(parser);
16291
16292 pm_token_t operator = parser->previous;
16293 pm_node_t *value = NULL;
16294
16295 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16296 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16297 }
16298
16299 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16300 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16301
16302 int depth;
16303 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16304 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16305 }
16306
16307 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16308 value = UP(pm_local_variable_target_node_create(
16309 parser,
16310 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16311 constant_id,
16312 (uint32_t) (depth == -1 ? 0 : depth)
16313 ));
16314 }
16315
16316 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16317}
16318
16323static bool
16324pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16325 ptrdiff_t length = end - start;
16326 if (length == 0) return false;
16327
16328 // First ensure that it starts with a valid identifier starting character.
16329 size_t width = char_is_identifier_start(parser, start, end - start);
16330 if (width == 0) return false;
16331
16332 // Next, ensure that it's not an uppercase character.
16333 if (parser->encoding_changed) {
16334 if (parser->encoding->isupper_char(start, length)) return false;
16335 } else {
16336 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16337 }
16338
16339 // Next, iterate through all of the bytes of the string to ensure that they
16340 // are all valid identifier characters.
16341 const uint8_t *cursor = start + width;
16342 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16343 return cursor == end;
16344}
16345
16350static pm_node_t *
16351parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16352 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16353
16354 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16355 int depth = -1;
16356
16357 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16358 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16359 } else {
16360 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16361
16362 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16363 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16364 }
16365 }
16366
16367 if (depth == -1) {
16368 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
16369 }
16370
16371 parse_pattern_capture(parser, captures, constant_id, value_loc);
16372 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16373 parser,
16374 value_loc,
16375 constant_id,
16376 (uint32_t) (depth == -1 ? 0 : depth)
16377 );
16378
16379 return UP(pm_implicit_node_create(parser, UP(target)));
16380}
16381
16386static void
16387parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16388 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16389 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16390 }
16391}
16392
16397parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16398 pm_node_list_t assocs = { 0 };
16399 pm_static_literals_t keys = { 0 };
16400 pm_node_t *rest = NULL;
16401
16402 switch (PM_NODE_TYPE(first_node)) {
16403 case PM_ASSOC_SPLAT_NODE:
16404 case PM_NO_KEYWORDS_PARAMETER_NODE:
16405 rest = first_node;
16406 break;
16407 case PM_SYMBOL_NODE: {
16408 if (pm_symbol_node_label_p(first_node)) {
16409 parse_pattern_hash_key(parser, &keys, first_node);
16410 pm_node_t *value;
16411
16412 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16413 // Otherwise, we will create an implicit local variable
16414 // target for the value.
16415 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16416 } else {
16417 // Here we have a value for the first assoc in the list, so
16418 // we will parse it now.
16419 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16420 }
16421
16422 pm_token_t operator = not_provided(parser);
16423 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16424
16425 pm_node_list_append(&assocs, assoc);
16426 break;
16427 }
16428 }
16430 default: {
16431 // If we get anything else, then this is an error. For this we'll
16432 // create a missing node for the value and create an assoc node for
16433 // the first node in the list.
16434 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16435 pm_parser_err_node(parser, first_node, diag_id);
16436
16437 pm_token_t operator = not_provided(parser);
16438 pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
16439 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16440
16441 pm_node_list_append(&assocs, assoc);
16442 break;
16443 }
16444 }
16445
16446 // If there are any other assocs, then we'll parse them now.
16447 while (accept1(parser, PM_TOKEN_COMMA)) {
16448 // Here we need to break to support trailing commas.
16449 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16450 // Trailing commas are not allowed to follow a rest pattern.
16451 if (rest != NULL) {
16452 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16453 }
16454
16455 break;
16456 }
16457
16458 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16459 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16460
16461 if (rest == NULL) {
16462 rest = assoc;
16463 } else {
16464 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16465 pm_node_list_append(&assocs, assoc);
16466 }
16467 } else {
16468 pm_node_t *key;
16469
16470 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16471 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16472
16473 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16474 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16475 } else if (!pm_symbol_node_label_p(key)) {
16476 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16477 }
16478 } else {
16479 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16480 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16481 }
16482
16483 parse_pattern_hash_key(parser, &keys, key);
16484 pm_node_t *value = NULL;
16485
16486 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16487 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16488 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16489 } else {
16490 value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
16491 }
16492 } else {
16493 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16494 }
16495
16496 pm_token_t operator = not_provided(parser);
16497 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
16498
16499 if (rest != NULL) {
16500 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16501 }
16502
16503 pm_node_list_append(&assocs, assoc);
16504 }
16505 }
16506
16507 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16508 xfree(assocs.nodes);
16509
16510 pm_static_literals_free(&keys);
16511 return node;
16512}
16513
16517static pm_node_t *
16518parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16519 switch (parser->current.type) {
16520 case PM_TOKEN_IDENTIFIER:
16521 case PM_TOKEN_METHOD_NAME: {
16522 parser_lex(parser);
16523 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16524
16525 int depth;
16526 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16527 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16528 }
16529
16530 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16531 return UP(pm_local_variable_target_node_create(
16532 parser,
16533 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16534 constant_id,
16535 (uint32_t) (depth == -1 ? 0 : depth)
16536 ));
16537 }
16538 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16539 pm_token_t opening = parser->current;
16540 parser_lex(parser);
16541
16542 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16543 // If we have an empty array pattern, then we'll just return a new
16544 // array pattern node.
16545 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16546 }
16547
16548 // Otherwise, we'll parse the inner pattern, then deal with it depending
16549 // on the type it returns.
16550 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16551
16552 accept1(parser, PM_TOKEN_NEWLINE);
16553 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16554 pm_token_t closing = parser->previous;
16555
16556 switch (PM_NODE_TYPE(inner)) {
16557 case PM_ARRAY_PATTERN_NODE: {
16558 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16559 if (pattern_node->opening_loc.start == NULL) {
16560 pattern_node->base.location.start = opening.start;
16561 pattern_node->base.location.end = closing.end;
16562
16563 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16564 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16565
16566 return UP(pattern_node);
16567 }
16568
16569 break;
16570 }
16571 case PM_FIND_PATTERN_NODE: {
16572 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16573 if (pattern_node->opening_loc.start == NULL) {
16574 pattern_node->base.location.start = opening.start;
16575 pattern_node->base.location.end = closing.end;
16576
16577 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16578 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16579
16580 return UP(pattern_node);
16581 }
16582
16583 break;
16584 }
16585 default:
16586 break;
16587 }
16588
16589 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16590 pm_array_pattern_node_requireds_append(node, inner);
16591 return UP(node);
16592 }
16593 case PM_TOKEN_BRACE_LEFT: {
16594 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16595 parser->pattern_matching_newlines = false;
16596
16598 pm_token_t opening = parser->current;
16599 parser_lex(parser);
16600
16601 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16602 // If we have an empty hash pattern, then we'll just return a new hash
16603 // pattern node.
16604 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16605 } else {
16606 pm_node_t *first_node;
16607
16608 switch (parser->current.type) {
16609 case PM_TOKEN_LABEL:
16610 parser_lex(parser);
16611 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16612 break;
16613 case PM_TOKEN_USTAR_STAR:
16614 first_node = parse_pattern_keyword_rest(parser, captures);
16615 break;
16616 case PM_TOKEN_STRING_BEGIN:
16617 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16618 break;
16619 default: {
16620 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16621 parser_lex(parser);
16622
16623 first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
16624 break;
16625 }
16626 }
16627
16628 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16629
16630 accept1(parser, PM_TOKEN_NEWLINE);
16631 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
16632 pm_token_t closing = parser->previous;
16633
16634 node->base.location.start = opening.start;
16635 node->base.location.end = closing.end;
16636
16637 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16638 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16639 }
16640
16641 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16642 return UP(node);
16643 }
16644 case PM_TOKEN_UDOT_DOT:
16645 case PM_TOKEN_UDOT_DOT_DOT: {
16646 pm_token_t operator = parser->current;
16647 parser_lex(parser);
16648
16649 // Since we have a unary range operator, we need to parse the subsequent
16650 // expression as the right side of the range.
16651 switch (parser->current.type) {
16652 case PM_CASE_PRIMITIVE: {
16653 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16654 return UP(pm_range_node_create(parser, NULL, &operator, right));
16655 }
16656 default: {
16657 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16658 pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
16659 return UP(pm_range_node_create(parser, NULL, &operator, right));
16660 }
16661 }
16662 }
16663 case PM_CASE_PRIMITIVE: {
16664 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16665
16666 // If we found a label, we need to immediately return to the caller.
16667 if (pm_symbol_node_label_p(node)) return node;
16668
16669 // Call nodes (arithmetic operations) are not allowed in patterns
16670 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16671 pm_parser_err_node(parser, node, diag_id);
16672 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
16673 pm_node_destroy(parser, node);
16674 return UP(missing_node);
16675 }
16676
16677 // Now that we have a primitive, we need to check if it's part of a range.
16678 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16679 pm_token_t operator = parser->previous;
16680
16681 // Now that we have the operator, we need to check if this is followed
16682 // by another expression. If it is, then we will create a full range
16683 // node. Otherwise, we'll create an endless range.
16684 switch (parser->current.type) {
16685 case PM_CASE_PRIMITIVE: {
16686 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16687 return UP(pm_range_node_create(parser, node, &operator, right));
16688 }
16689 default:
16690 return UP(pm_range_node_create(parser, node, &operator, NULL));
16691 }
16692 }
16693
16694 return node;
16695 }
16696 case PM_TOKEN_CARET: {
16697 parser_lex(parser);
16698 pm_token_t operator = parser->previous;
16699
16700 // At this point we have a pin operator. We need to check the subsequent
16701 // expression to determine if it's a variable or an expression.
16702 switch (parser->current.type) {
16703 case PM_TOKEN_IDENTIFIER: {
16704 parser_lex(parser);
16705 pm_node_t *variable = UP(parse_variable(parser));
16706
16707 if (variable == NULL) {
16708 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16709 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16710 }
16711
16712 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16713 }
16714 case PM_TOKEN_INSTANCE_VARIABLE: {
16715 parser_lex(parser);
16716 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16717
16718 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16719 }
16720 case PM_TOKEN_CLASS_VARIABLE: {
16721 parser_lex(parser);
16722 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16723
16724 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16725 }
16726 case PM_TOKEN_GLOBAL_VARIABLE: {
16727 parser_lex(parser);
16728 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16729
16730 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16731 }
16732 case PM_TOKEN_NUMBERED_REFERENCE: {
16733 parser_lex(parser);
16734 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16735
16736 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16737 }
16738 case PM_TOKEN_BACK_REFERENCE: {
16739 parser_lex(parser);
16740 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16741
16742 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16743 }
16744 case PM_TOKEN_PARENTHESIS_LEFT: {
16745 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16746 parser->pattern_matching_newlines = false;
16747
16748 pm_token_t lparen = parser->current;
16749 parser_lex(parser);
16750
16751 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16752 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16753
16754 accept1(parser, PM_TOKEN_NEWLINE);
16755 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16756 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16757 }
16758 default: {
16759 // If we get here, then we have a pin operator followed by something
16760 // not understood. We'll create a missing node and return that.
16761 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16762 pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
16763 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16764 }
16765 }
16766 }
16767 case PM_TOKEN_UCOLON_COLON: {
16768 pm_token_t delimiter = parser->current;
16769 parser_lex(parser);
16770
16771 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16772 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16773
16774 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16775 }
16776 case PM_TOKEN_CONSTANT: {
16777 pm_token_t constant = parser->current;
16778 parser_lex(parser);
16779
16780 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16781 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16782 }
16783 default:
16784 pm_parser_err_current(parser, diag_id);
16785 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16786 }
16787}
16788
16789static bool
16790parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16791 switch (PM_NODE_TYPE(node)) {
16792 case PM_LOCAL_VARIABLE_TARGET_NODE:
16793 pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16794 return false;
16795 default:
16796 return true;
16797 }
16798}
16799
16804static void
16805parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16806 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16807}
16808
16813static pm_node_t *
16814parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16815 pm_node_t *node = first_node;
16816 bool alternation = false;
16817
16818 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16819 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16820 parse_pattern_alternation_error(parser, node);
16821 }
16822
16823 switch (parser->current.type) {
16824 case PM_TOKEN_IDENTIFIER:
16825 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16826 case PM_TOKEN_BRACE_LEFT:
16827 case PM_TOKEN_CARET:
16828 case PM_TOKEN_CONSTANT:
16829 case PM_TOKEN_UCOLON_COLON:
16830 case PM_TOKEN_UDOT_DOT:
16831 case PM_TOKEN_UDOT_DOT_DOT:
16832 case PM_CASE_PRIMITIVE: {
16833 if (!alternation) {
16834 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16835 } else {
16836 pm_token_t operator = parser->previous;
16837 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16838
16839 if (captures->size) parse_pattern_alternation_error(parser, right);
16840 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16841 }
16842
16843 break;
16844 }
16845 case PM_TOKEN_PARENTHESIS_LEFT:
16846 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16847 pm_token_t operator = parser->previous;
16848 pm_token_t opening = parser->current;
16849 parser_lex(parser);
16850
16851 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16852 accept1(parser, PM_TOKEN_NEWLINE);
16853 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16854 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16855
16856 if (!alternation) {
16857 node = right;
16858 } else {
16859 if (captures->size) parse_pattern_alternation_error(parser, right);
16860 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16861 }
16862
16863 break;
16864 }
16865 default: {
16866 pm_parser_err_current(parser, diag_id);
16867 pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16868
16869 if (!alternation) {
16870 node = right;
16871 } else {
16872 if (captures->size) parse_pattern_alternation_error(parser, right);
16873 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16874 }
16875
16876 break;
16877 }
16878 }
16879 }
16880
16881 // If we have an =>, then we are assigning this pattern to a variable.
16882 // In this case we should create an assignment node.
16883 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16884 pm_token_t operator = parser->previous;
16885 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16886
16887 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16888 int depth;
16889
16890 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16891 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16892 }
16893
16894 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16895 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16896 parser,
16897 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16898 constant_id,
16899 (uint32_t) (depth == -1 ? 0 : depth)
16900 );
16901
16902 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16903 }
16904
16905 return node;
16906}
16907
16911static pm_node_t *
16912parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16913 pm_node_t *node = NULL;
16914
16915 bool leading_rest = false;
16916 bool trailing_rest = false;
16917
16918 switch (parser->current.type) {
16919 case PM_TOKEN_LABEL: {
16920 parser_lex(parser);
16921 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16922 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16923
16924 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16925 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16926 }
16927
16928 return node;
16929 }
16930 case PM_TOKEN_USTAR_STAR: {
16931 node = parse_pattern_keyword_rest(parser, captures);
16932 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16933
16934 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16935 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16936 }
16937
16938 return node;
16939 }
16940 case PM_TOKEN_STRING_BEGIN: {
16941 // We need special handling for string beginnings because they could
16942 // be dynamic symbols leading to hash patterns.
16943 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16944
16945 if (pm_symbol_node_label_p(node)) {
16946 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16947
16948 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16949 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16950 }
16951
16952 return node;
16953 }
16954
16955 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
16956 break;
16957 }
16958 case PM_TOKEN_USTAR: {
16959 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16960 parser_lex(parser);
16961 node = UP(parse_pattern_rest(parser, captures));
16962 leading_rest = true;
16963 break;
16964 }
16965 }
16967 default:
16968 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
16969 break;
16970 }
16971
16972 // If we got a dynamic label symbol, then we need to treat it like the
16973 // beginning of a hash pattern.
16974 if (pm_symbol_node_label_p(node)) {
16975 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16976 }
16977
16978 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16979 // If we have a comma, then we are now parsing either an array pattern
16980 // or a find pattern. We need to parse all of the patterns, put them
16981 // into a big list, and then determine which type of node we have.
16982 pm_node_list_t nodes = { 0 };
16983 pm_node_list_append(&nodes, node);
16984
16985 // Gather up all of the patterns into the list.
16986 while (accept1(parser, PM_TOKEN_COMMA)) {
16987 // Break early here in case we have a trailing comma.
16988 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
16989 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
16990 pm_node_list_append(&nodes, node);
16991 trailing_rest = true;
16992 break;
16993 }
16994
16995 if (accept1(parser, PM_TOKEN_USTAR)) {
16996 node = UP(parse_pattern_rest(parser, captures));
16997
16998 // If we have already parsed a splat pattern, then this is an
16999 // error. We will continue to parse the rest of the patterns,
17000 // but we will indicate it as an error.
17001 if (trailing_rest) {
17002 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17003 }
17004
17005 trailing_rest = true;
17006 } else {
17007 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17008 }
17009
17010 pm_node_list_append(&nodes, node);
17011 }
17012
17013 // If the first pattern and the last pattern are rest patterns, then we
17014 // will call this a find pattern, regardless of how many rest patterns
17015 // are in between because we know we already added the appropriate
17016 // errors. Otherwise we will create an array pattern.
17017 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17018 node = UP(pm_find_pattern_node_create(parser, &nodes));
17019
17020 if (nodes.size == 2) {
17021 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17022 }
17023 } else {
17024 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17025
17026 if (leading_rest && trailing_rest) {
17027 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17028 }
17029 }
17030
17031 xfree(nodes.nodes);
17032 } else if (leading_rest) {
17033 // Otherwise, if we parsed a single splat pattern, then we know we have
17034 // an array pattern, so we can go ahead and create that node.
17035 node = UP(pm_array_pattern_node_rest_create(parser, node));
17036 }
17037
17038 return node;
17039}
17040
17046static inline void
17047parse_negative_numeric(pm_node_t *node) {
17048 switch (PM_NODE_TYPE(node)) {
17049 case PM_INTEGER_NODE: {
17050 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17051 cast->base.location.start--;
17052 cast->value.negative = true;
17053 break;
17054 }
17055 case PM_FLOAT_NODE: {
17056 pm_float_node_t *cast = (pm_float_node_t *) node;
17057 cast->base.location.start--;
17058 cast->value = -cast->value;
17059 break;
17060 }
17061 case PM_RATIONAL_NODE: {
17062 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17063 cast->base.location.start--;
17064 cast->numerator.negative = true;
17065 break;
17066 }
17067 case PM_IMAGINARY_NODE:
17068 node->location.start--;
17069 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17070 break;
17071 default:
17072 assert(false && "unreachable");
17073 break;
17074 }
17075}
17076
17082static void
17083pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17084 switch (diag_id) {
17085 case PM_ERR_HASH_KEY: {
17086 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17087 break;
17088 }
17089 case PM_ERR_HASH_VALUE:
17090 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17091 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17092 break;
17093 }
17094 case PM_ERR_UNARY_RECEIVER: {
17095 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17096 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17097 break;
17098 }
17099 case PM_ERR_UNARY_DISALLOWED:
17100 case PM_ERR_EXPECT_ARGUMENT: {
17101 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17102 break;
17103 }
17104 default:
17105 pm_parser_err_previous(parser, diag_id);
17106 break;
17107 }
17108}
17109
17113static void
17114parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17115#define CONTEXT_NONE 0
17116#define CONTEXT_THROUGH_ENSURE 1
17117#define CONTEXT_THROUGH_ELSE 2
17118
17119 pm_context_node_t *context_node = parser->current_context;
17120 int context = CONTEXT_NONE;
17121
17122 while (context_node != NULL) {
17123 switch (context_node->context) {
17131 case PM_CONTEXT_DEFINED:
17133 // These are the good cases. We're allowed to have a retry here.
17134 return;
17135 case PM_CONTEXT_CLASS:
17136 case PM_CONTEXT_DEF:
17138 case PM_CONTEXT_MAIN:
17139 case PM_CONTEXT_MODULE:
17140 case PM_CONTEXT_PREEXE:
17141 case PM_CONTEXT_SCLASS:
17142 // These are the bad cases. We're not allowed to have a retry in
17143 // these contexts.
17144 if (context == CONTEXT_NONE) {
17145 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17146 } else if (context == CONTEXT_THROUGH_ENSURE) {
17147 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17148 } else if (context == CONTEXT_THROUGH_ELSE) {
17149 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17150 }
17151 return;
17159 // These are also bad cases, but with a more specific error
17160 // message indicating the else.
17161 context = CONTEXT_THROUGH_ELSE;
17162 break;
17170 // These are also bad cases, but with a more specific error
17171 // message indicating the ensure.
17172 context = CONTEXT_THROUGH_ENSURE;
17173 break;
17174 case PM_CONTEXT_NONE:
17175 // This case should never happen.
17176 assert(false && "unreachable");
17177 break;
17178 case PM_CONTEXT_BEGIN:
17182 case PM_CONTEXT_CASE_IN:
17185 case PM_CONTEXT_ELSE:
17186 case PM_CONTEXT_ELSIF:
17187 case PM_CONTEXT_EMBEXPR:
17189 case PM_CONTEXT_FOR:
17190 case PM_CONTEXT_IF:
17195 case PM_CONTEXT_PARENS:
17196 case PM_CONTEXT_POSTEXE:
17198 case PM_CONTEXT_TERNARY:
17199 case PM_CONTEXT_UNLESS:
17200 case PM_CONTEXT_UNTIL:
17201 case PM_CONTEXT_WHILE:
17202 // In these contexts we should continue walking up the list of
17203 // contexts.
17204 break;
17205 }
17206
17207 context_node = context_node->prev;
17208 }
17209
17210#undef CONTEXT_NONE
17211#undef CONTEXT_ENSURE
17212#undef CONTEXT_ELSE
17213}
17214
17218static void
17219parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17220 pm_context_node_t *context_node = parser->current_context;
17221
17222 while (context_node != NULL) {
17223 switch (context_node->context) {
17224 case PM_CONTEXT_DEF:
17226 case PM_CONTEXT_DEFINED:
17230 // These are the good cases. We're allowed to have a block exit
17231 // in these contexts.
17232 return;
17233 case PM_CONTEXT_CLASS:
17237 case PM_CONTEXT_MAIN:
17238 case PM_CONTEXT_MODULE:
17242 case PM_CONTEXT_SCLASS:
17246 // These are the bad cases. We're not allowed to have a retry in
17247 // these contexts.
17248 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17249 return;
17250 case PM_CONTEXT_NONE:
17251 // This case should never happen.
17252 assert(false && "unreachable");
17253 break;
17254 case PM_CONTEXT_BEGIN:
17264 case PM_CONTEXT_CASE_IN:
17267 case PM_CONTEXT_ELSE:
17268 case PM_CONTEXT_ELSIF:
17269 case PM_CONTEXT_EMBEXPR:
17271 case PM_CONTEXT_FOR:
17272 case PM_CONTEXT_IF:
17280 case PM_CONTEXT_PARENS:
17281 case PM_CONTEXT_POSTEXE:
17283 case PM_CONTEXT_PREEXE:
17285 case PM_CONTEXT_TERNARY:
17286 case PM_CONTEXT_UNLESS:
17287 case PM_CONTEXT_UNTIL:
17288 case PM_CONTEXT_WHILE:
17289 // In these contexts we should continue walking up the list of
17290 // contexts.
17291 break;
17292 }
17293
17294 context_node = context_node->prev;
17295 }
17296}
17297
17302typedef struct {
17305
17307 const uint8_t *start;
17308
17310 const uint8_t *end;
17311
17320
17325static void
17326parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17328 pm_location_t location;
17329
17330 if (callback_data->shared) {
17331 location = (pm_location_t) { .start = start, .end = end };
17332 } else {
17333 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17334 }
17335
17336 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17337}
17338
17342static void
17343parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17344 const pm_string_t *unescaped = &node->unescaped;
17346 .parser = parser,
17347 .start = node->base.location.start,
17348 .end = node->base.location.end,
17349 .shared = unescaped->type == PM_STRING_SHARED
17350 };
17351
17352 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17353}
17354
17358static inline pm_node_t *
17359parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17360 switch (parser->current.type) {
17361 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17362 parser_lex(parser);
17363
17364 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17365 pm_accepts_block_stack_push(parser, true);
17366 bool parsed_bare_hash = false;
17367
17368 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17369 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17370
17371 // Handle the case where we don't have a comma and we have a
17372 // newline followed by a right bracket.
17373 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17374 break;
17375 }
17376
17377 // Ensure that we have a comma between elements in the array.
17378 if (array->elements.size > 0) {
17379 if (accept1(parser, PM_TOKEN_COMMA)) {
17380 // If there was a comma but we also accepts a newline,
17381 // then this is a syntax error.
17382 if (accepted_newline) {
17383 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17384 }
17385 } else {
17386 // If there was no comma, then we need to add a syntax
17387 // error.
17388 const uint8_t *location = parser->previous.end;
17389 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17390
17391 parser->previous.start = location;
17392 parser->previous.type = PM_TOKEN_MISSING;
17393 }
17394 }
17395
17396 // If we have a right bracket immediately following a comma,
17397 // this is allowed since it's a trailing comma. In this case we
17398 // can break out of the loop.
17399 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17400
17401 pm_node_t *element;
17402
17403 if (accept1(parser, PM_TOKEN_USTAR)) {
17404 pm_token_t operator = parser->previous;
17405 pm_node_t *expression = NULL;
17406
17407 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17408 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17409 } else {
17410 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17411 }
17412
17413 element = UP(pm_splat_node_create(parser, &operator, expression));
17414 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17415 if (parsed_bare_hash) {
17416 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17417 }
17418
17419 element = UP(pm_keyword_hash_node_create(parser));
17420 pm_static_literals_t hash_keys = { 0 };
17421
17422 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17423 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17424 }
17425
17426 pm_static_literals_free(&hash_keys);
17427 parsed_bare_hash = true;
17428 } else {
17429 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17430
17431 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17432 if (parsed_bare_hash) {
17433 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17434 }
17435
17436 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17437 pm_static_literals_t hash_keys = { 0 };
17438 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17439
17440 pm_token_t operator;
17441 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17442 operator = parser->previous;
17443 } else {
17444 operator = not_provided(parser);
17445 }
17446
17447 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17448 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
17449 pm_keyword_hash_node_elements_append(hash, assoc);
17450
17451 element = UP(hash);
17452 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17453 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17454 }
17455
17456 pm_static_literals_free(&hash_keys);
17457 parsed_bare_hash = true;
17458 }
17459 }
17460
17461 pm_array_node_elements_append(array, element);
17462 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17463 }
17464
17465 accept1(parser, PM_TOKEN_NEWLINE);
17466
17467 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17468 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17469 parser->previous.start = parser->previous.end;
17470 parser->previous.type = PM_TOKEN_MISSING;
17471 }
17472
17473 pm_array_node_close_set(array, &parser->previous);
17474 pm_accepts_block_stack_pop(parser);
17475
17476 return UP(array);
17477 }
17478 case PM_TOKEN_PARENTHESIS_LEFT:
17479 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17480 pm_token_t opening = parser->current;
17481 pm_node_flags_t flags = 0;
17482
17483 pm_node_list_t current_block_exits = { 0 };
17484 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17485
17486 parser_lex(parser);
17487 while (true) {
17488 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17489 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17490 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17491 break;
17492 }
17493 }
17494
17495 // If this is the end of the file or we match a right parenthesis, then
17496 // we have an empty parentheses node, and we can immediately return.
17497 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17498 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17499
17500 pop_block_exits(parser, previous_block_exits);
17501 pm_node_list_free(&current_block_exits);
17502
17503 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17504 }
17505
17506 // Otherwise, we're going to parse the first statement in the list
17507 // of statements within the parentheses.
17508 pm_accepts_block_stack_push(parser, true);
17509 context_push(parser, PM_CONTEXT_PARENS);
17510 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17511 context_pop(parser);
17512
17513 // Determine if this statement is followed by a terminator. In the
17514 // case of a single statement, this is fine. But in the case of
17515 // multiple statements it's required.
17516 bool terminator_found = false;
17517
17518 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17519 terminator_found = true;
17520 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17521 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17522 terminator_found = true;
17523 }
17524
17525 if (terminator_found) {
17526 while (true) {
17527 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17528 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17529 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17530 break;
17531 }
17532 }
17533 }
17534
17535 // If we hit a right parenthesis, then we're done parsing the
17536 // parentheses node, and we can check which kind of node we should
17537 // return.
17538 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17539 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17540 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17541 }
17542
17543 parser_lex(parser);
17544 pm_accepts_block_stack_pop(parser);
17545
17546 pop_block_exits(parser, previous_block_exits);
17547 pm_node_list_free(&current_block_exits);
17548
17549 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17550 // If we have a single statement and are ending on a right
17551 // parenthesis, then we need to check if this is possibly a
17552 // multiple target node.
17553 pm_multi_target_node_t *multi_target;
17554
17555 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
17556 multi_target = (pm_multi_target_node_t *) statement;
17557 } else {
17558 multi_target = pm_multi_target_node_create(parser);
17559 pm_multi_target_node_targets_append(parser, multi_target, statement);
17560 }
17561
17562 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17563 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
17564
17565 multi_target->lparen_loc = lparen_loc;
17566 multi_target->rparen_loc = rparen_loc;
17567 multi_target->base.location.start = lparen_loc.start;
17568 multi_target->base.location.end = rparen_loc.end;
17569
17570 pm_node_t *result;
17571 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17572 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17573 accept1(parser, PM_TOKEN_NEWLINE);
17574 } else {
17575 result = UP(multi_target);
17576 }
17577
17578 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17579 // All set, this is explicitly allowed by the parent
17580 // context.
17581 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17582 // All set, we're inside a for loop and we're parsing
17583 // multiple targets.
17584 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17585 // Multi targets are not allowed when it's not a
17586 // statement level.
17587 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17588 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17589 // Multi targets must be followed by an equal sign in
17590 // order to be valid (or a right parenthesis if they are
17591 // nested).
17592 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17593 }
17594
17595 return result;
17596 }
17597
17598 // If we have a single statement and are ending on a right parenthesis
17599 // and we didn't return a multiple assignment node, then we can return a
17600 // regular parentheses node now.
17601 pm_statements_node_t *statements = pm_statements_node_create(parser);
17602 pm_statements_node_body_append(parser, statements, statement, true);
17603
17604 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17605 }
17606
17607 // If we have more than one statement in the set of parentheses,
17608 // then we are going to parse all of them as a list of statements.
17609 // We'll do that here.
17610 context_push(parser, PM_CONTEXT_PARENS);
17611 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17612
17613 pm_statements_node_t *statements = pm_statements_node_create(parser);
17614 pm_statements_node_body_append(parser, statements, statement, true);
17615
17616 // If we didn't find a terminator and we didn't find a right
17617 // parenthesis, then this is a syntax error.
17618 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17619 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17620 }
17621
17622 // Parse each statement within the parentheses.
17623 while (true) {
17624 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17625 pm_statements_node_body_append(parser, statements, node, true);
17626
17627 // If we're recovering from a syntax error, then we need to stop
17628 // parsing the statements now.
17629 if (parser->recovering) {
17630 // If this is the level of context where the recovery has
17631 // happened, then we can mark the parser as done recovering.
17632 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17633 break;
17634 }
17635
17636 // If we couldn't parse an expression at all, then we need to
17637 // bail out of the loop.
17638 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17639
17640 // If we successfully parsed a statement, then we are going to
17641 // need terminator to delimit them.
17642 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17643 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17644 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17645 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17646 break;
17647 } else if (!match1(parser, PM_TOKEN_EOF)) {
17648 // If we're at the end of the file, then we're going to add
17649 // an error after this for the ) anyway.
17650 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17651 }
17652 }
17653
17654 context_pop(parser);
17655 pm_accepts_block_stack_pop(parser);
17656 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17657
17658 // When we're parsing multi targets, we allow them to be followed by
17659 // a right parenthesis if they are at the statement level. This is
17660 // only possible if they are the final statement in a parentheses.
17661 // We need to explicitly reject that here.
17662 {
17663 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17664
17665 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17666 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17667 pm_multi_target_node_targets_append(parser, multi_target, statement);
17668
17669 statement = UP(multi_target);
17670 statements->body.nodes[statements->body.size - 1] = statement;
17671 }
17672
17673 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17674 const uint8_t *offset = statement->location.end;
17675 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17676 pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
17677
17678 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17679 statements->body.nodes[statements->body.size - 1] = statement;
17680
17681 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17682 }
17683 }
17684
17685 pop_block_exits(parser, previous_block_exits);
17686 pm_node_list_free(&current_block_exits);
17687
17688 pm_void_statements_check(parser, statements, true);
17689 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17690 }
17691 case PM_TOKEN_BRACE_LEFT: {
17692 // If we were passed a current_hash_keys via the parser, then that
17693 // means we're already parsing a hash and we want to share the set
17694 // of hash keys with this inner hash we're about to parse for the
17695 // sake of warnings. We'll set it to NULL after we grab it to make
17696 // sure subsequent expressions don't use it. Effectively this is a
17697 // way of getting around passing it to every call to
17698 // parse_expression.
17699 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17700 parser->current_hash_keys = NULL;
17701
17702 pm_accepts_block_stack_push(parser, true);
17703 parser_lex(parser);
17704
17705 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
17706
17707 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17708 if (current_hash_keys != NULL) {
17709 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17710 } else {
17711 pm_static_literals_t hash_keys = { 0 };
17712 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17713 pm_static_literals_free(&hash_keys);
17714 }
17715
17716 accept1(parser, PM_TOKEN_NEWLINE);
17717 }
17718
17719 pm_accepts_block_stack_pop(parser);
17720 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
17721 pm_hash_node_closing_loc_set(node, &parser->previous);
17722
17723 return UP(node);
17724 }
17725 case PM_TOKEN_CHARACTER_LITERAL: {
17726 pm_token_t closing = not_provided(parser);
17727 pm_node_t *node = UP(pm_string_node_create_current_string(
17728 parser,
17729 &(pm_token_t) {
17730 .type = PM_TOKEN_STRING_BEGIN,
17731 .start = parser->current.start,
17732 .end = parser->current.start + 1
17733 },
17734 &(pm_token_t) {
17735 .type = PM_TOKEN_STRING_CONTENT,
17736 .start = parser->current.start + 1,
17737 .end = parser->current.end
17738 },
17739 &closing
17740 ));
17741
17742 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17743
17744 // Skip past the character literal here, since now we have handled
17745 // parser->explicit_encoding correctly.
17746 parser_lex(parser);
17747
17748 // Characters can be followed by strings in which case they are
17749 // automatically concatenated.
17750 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17751 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17752 }
17753
17754 return node;
17755 }
17756 case PM_TOKEN_CLASS_VARIABLE: {
17757 parser_lex(parser);
17758 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17759
17760 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17761 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17762 }
17763
17764 return node;
17765 }
17766 case PM_TOKEN_CONSTANT: {
17767 parser_lex(parser);
17768 pm_token_t constant = parser->previous;
17769
17770 // If a constant is immediately followed by parentheses, then this is in
17771 // fact a method call, not a constant read.
17772 if (
17773 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17774 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17775 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17776 match1(parser, PM_TOKEN_BRACE_LEFT)
17777 ) {
17778 pm_arguments_t arguments = { 0 };
17779 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17780 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17781 }
17782
17783 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17784
17785 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17786 // If we get here, then we have a comma immediately following a
17787 // constant, so we're going to parse this as a multiple assignment.
17788 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17789 }
17790
17791 return node;
17792 }
17793 case PM_TOKEN_UCOLON_COLON: {
17794 parser_lex(parser);
17795 pm_token_t delimiter = parser->previous;
17796
17797 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17798 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17799
17800 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17801 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17802 }
17803
17804 return node;
17805 }
17806 case PM_TOKEN_UDOT_DOT:
17807 case PM_TOKEN_UDOT_DOT_DOT: {
17808 pm_token_t operator = parser->current;
17809 parser_lex(parser);
17810
17811 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17812
17813 // Unary .. and ... are special because these are non-associative
17814 // operators that can also be unary operators. In this case we need
17815 // to explicitly reject code that has a .. or ... that follows this
17816 // expression.
17817 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17818 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17819 }
17820
17821 return UP(pm_range_node_create(parser, NULL, &operator, right));
17822 }
17823 case PM_TOKEN_FLOAT:
17824 parser_lex(parser);
17825 return UP(pm_float_node_create(parser, &parser->previous));
17826 case PM_TOKEN_FLOAT_IMAGINARY:
17827 parser_lex(parser);
17828 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17829 case PM_TOKEN_FLOAT_RATIONAL:
17830 parser_lex(parser);
17831 return UP(pm_float_node_rational_create(parser, &parser->previous));
17832 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17833 parser_lex(parser);
17834 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17835 case PM_TOKEN_NUMBERED_REFERENCE: {
17836 parser_lex(parser);
17837 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17838
17839 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17840 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17841 }
17842
17843 return node;
17844 }
17845 case PM_TOKEN_GLOBAL_VARIABLE: {
17846 parser_lex(parser);
17847 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17848
17849 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17850 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17851 }
17852
17853 return node;
17854 }
17855 case PM_TOKEN_BACK_REFERENCE: {
17856 parser_lex(parser);
17857 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17858
17859 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17860 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17861 }
17862
17863 return node;
17864 }
17865 case PM_TOKEN_IDENTIFIER:
17866 case PM_TOKEN_METHOD_NAME: {
17867 parser_lex(parser);
17868 pm_token_t identifier = parser->previous;
17869 pm_node_t *node = parse_variable_call(parser);
17870
17871 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17872 // If parse_variable_call returned with a call node, then we
17873 // know the identifier is not in the local table. In that case
17874 // we need to check if there are arguments following the
17875 // identifier.
17876 pm_call_node_t *call = (pm_call_node_t *) node;
17877 pm_arguments_t arguments = { 0 };
17878
17879 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
17880 // Since we found arguments, we need to turn off the
17881 // variable call bit in the flags.
17882 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17883
17884 call->opening_loc = arguments.opening_loc;
17885 call->arguments = arguments.arguments;
17886 call->closing_loc = arguments.closing_loc;
17887 call->block = arguments.block;
17888
17889 const uint8_t *end = pm_arguments_end(&arguments);
17890 if (!end) {
17891 end = call->message_loc.end;
17892 }
17893 call->base.location.end = end;
17894 }
17895 } else {
17896 // Otherwise, we know the identifier is in the local table. This
17897 // can still be a method call if it is followed by arguments or
17898 // a block, so we need to check for that here.
17899 if (
17900 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17901 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17902 match1(parser, PM_TOKEN_BRACE_LEFT)
17903 ) {
17904 pm_arguments_t arguments = { 0 };
17905 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17906 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17907
17908 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17909 // If we're about to convert an 'it' implicit local
17910 // variable read into a method call, we need to remove
17911 // it from the list of implicit local variables.
17912 pm_node_unreference(parser, node);
17913 } else {
17914 // Otherwise, we're about to convert a regular local
17915 // variable read into a method call, in which case we
17916 // need to indicate that this was not a read for the
17917 // purposes of warnings.
17918 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17919
17920 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17921 pm_node_unreference(parser, node);
17922 } else {
17924 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17925 }
17926 }
17927
17928 pm_node_destroy(parser, node);
17929 return UP(fcall);
17930 }
17931 }
17932
17933 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17934 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17935 }
17936
17937 return node;
17938 }
17939 case PM_TOKEN_HEREDOC_START: {
17940 // Here we have found a heredoc. We'll parse it and add it to the
17941 // list of strings.
17942 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
17943 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
17944
17945 size_t common_whitespace = (size_t) -1;
17946 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
17947
17948 parser_lex(parser);
17949 pm_token_t opening = parser->previous;
17950
17951 pm_node_t *node;
17952 pm_node_t *part;
17953
17954 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17955 // If we get here, then we have an empty heredoc. We'll create
17956 // an empty content token and return an empty string node.
17957 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
17958 pm_token_t content = parse_strings_empty_content(parser->previous.start);
17959
17960 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
17961 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
17962 } else {
17963 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
17964 }
17965
17966 node->location.end = opening.end;
17967 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
17968 // If we get here, then we tried to find something in the
17969 // heredoc but couldn't actually parse anything, so we'll just
17970 // return a missing node.
17971 //
17972 // parse_string_part handles its own errors, so there is no need
17973 // for us to add one here.
17974 node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
17975 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17976 // If we get here, then the part that we parsed was plain string
17977 // content and we're at the end of the heredoc, so we can return
17978 // just a string node with the heredoc opening and closing as
17979 // its opening and closing.
17980 pm_node_flag_set(part, parse_unescaped_encoding(parser));
17981 pm_string_node_t *cast = (pm_string_node_t *) part;
17982
17983 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17984 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
17985 cast->base.location = cast->opening_loc;
17986
17987 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
17988 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
17989 cast->base.type = PM_X_STRING_NODE;
17990 }
17991
17992 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
17993 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
17994 }
17995
17996 node = UP(cast);
17997 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
17998 } else {
17999 // If we get here, then we have multiple parts in the heredoc,
18000 // so we'll need to create an interpolated string node to hold
18001 // them all.
18002 pm_node_list_t parts = { 0 };
18003 pm_node_list_append(&parts, part);
18004
18005 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18006 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18007 pm_node_list_append(&parts, part);
18008 }
18009 }
18010
18011 // Now that we have all of the parts, create the correct type of
18012 // interpolated node.
18013 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18014 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18015 cast->parts = parts;
18016
18017 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18018 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18019
18020 cast->base.location = cast->opening_loc;
18021 node = UP(cast);
18022 } else {
18023 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18024 pm_node_list_free(&parts);
18025
18026 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18027 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18028
18029 cast->base.location = cast->opening_loc;
18030 node = UP(cast);
18031 }
18032
18033 // If this is a heredoc that is indented with a ~, then we need
18034 // to dedent each line by the common leading whitespace.
18035 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18036 pm_node_list_t *nodes;
18037 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18038 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18039 } else {
18040 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18041 }
18042
18043 parse_heredoc_dedent(parser, nodes, common_whitespace);
18044 }
18045 }
18046
18047 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18048 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18049 }
18050
18051 return node;
18052 }
18053 case PM_TOKEN_INSTANCE_VARIABLE: {
18054 parser_lex(parser);
18055 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18056
18057 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18058 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18059 }
18060
18061 return node;
18062 }
18063 case PM_TOKEN_INTEGER: {
18064 pm_node_flags_t base = parser->integer_base;
18065 parser_lex(parser);
18066 return UP(pm_integer_node_create(parser, base, &parser->previous));
18067 }
18068 case PM_TOKEN_INTEGER_IMAGINARY: {
18069 pm_node_flags_t base = parser->integer_base;
18070 parser_lex(parser);
18071 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18072 }
18073 case PM_TOKEN_INTEGER_RATIONAL: {
18074 pm_node_flags_t base = parser->integer_base;
18075 parser_lex(parser);
18076 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18077 }
18078 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18079 pm_node_flags_t base = parser->integer_base;
18080 parser_lex(parser);
18081 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18082 }
18083 case PM_TOKEN_KEYWORD___ENCODING__:
18084 parser_lex(parser);
18085 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18086 case PM_TOKEN_KEYWORD___FILE__:
18087 parser_lex(parser);
18088 return UP(pm_source_file_node_create(parser, &parser->previous));
18089 case PM_TOKEN_KEYWORD___LINE__:
18090 parser_lex(parser);
18091 return UP(pm_source_line_node_create(parser, &parser->previous));
18092 case PM_TOKEN_KEYWORD_ALIAS: {
18093 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18094 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18095 }
18096
18097 parser_lex(parser);
18098 pm_token_t keyword = parser->previous;
18099
18100 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18101 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18102
18103 switch (PM_NODE_TYPE(new_name)) {
18104 case PM_BACK_REFERENCE_READ_NODE:
18105 case PM_NUMBERED_REFERENCE_READ_NODE:
18106 case PM_GLOBAL_VARIABLE_READ_NODE: {
18107 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18108 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18109 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18110 }
18111 } else {
18112 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18113 }
18114
18115 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18116 }
18117 case PM_SYMBOL_NODE:
18118 case PM_INTERPOLATED_SYMBOL_NODE: {
18119 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18120 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18121 }
18122 }
18124 default:
18125 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18126 }
18127 }
18128 case PM_TOKEN_KEYWORD_CASE: {
18129 size_t opening_newline_index = token_newline_index(parser);
18130 parser_lex(parser);
18131
18132 pm_token_t case_keyword = parser->previous;
18133 pm_node_t *predicate = NULL;
18134
18135 pm_node_list_t current_block_exits = { 0 };
18136 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18137
18138 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18139 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18140 predicate = NULL;
18141 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18142 predicate = NULL;
18143 } else if (!token_begins_expression_p(parser->current.type)) {
18144 predicate = NULL;
18145 } else {
18146 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18147 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18148 }
18149
18150 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18151 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18152 parser_lex(parser);
18153
18154 pop_block_exits(parser, previous_block_exits);
18155 pm_node_list_free(&current_block_exits);
18156
18157 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18158 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18159 }
18160
18161 // At this point we can create a case node, though we don't yet know
18162 // if it is a case-in or case-when node.
18163 pm_token_t end_keyword = not_provided(parser);
18164 pm_node_t *node;
18165
18166 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18167 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18168 pm_static_literals_t literals = { 0 };
18169
18170 // At this point we've seen a when keyword, so we know this is a
18171 // case-when node. We will continue to parse the when nodes
18172 // until we hit the end of the list.
18173 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18174 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18175 parser_lex(parser);
18176
18177 pm_token_t when_keyword = parser->previous;
18178 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18179
18180 do {
18181 if (accept1(parser, PM_TOKEN_USTAR)) {
18182 pm_token_t operator = parser->previous;
18183 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18184
18185 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18186 pm_when_node_conditions_append(when_node, UP(splat_node));
18187
18188 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18189 } else {
18190 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18191 pm_when_node_conditions_append(when_node, condition);
18192
18193 // If we found a missing node, then this is a syntax
18194 // error and we should stop looping.
18195 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18196
18197 // If this is a string node, then we need to mark it
18198 // as frozen because when clause strings are frozen.
18199 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18200 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18201 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18202 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18203 }
18204
18205 pm_when_clause_static_literals_add(parser, &literals, condition);
18206 }
18207 } while (accept1(parser, PM_TOKEN_COMMA));
18208
18209 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18210 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18211 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18212 }
18213 } else {
18214 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18215 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18216 }
18217
18218 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18219 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18220 if (statements != NULL) {
18221 pm_when_node_statements_set(when_node, statements);
18222 }
18223 }
18224
18225 pm_case_node_condition_append(case_node, UP(when_node));
18226 }
18227
18228 // If we didn't parse any conditions (in or when) then we need
18229 // to indicate that we have an error.
18230 if (case_node->conditions.size == 0) {
18231 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18232 }
18233
18234 pm_static_literals_free(&literals);
18235 node = UP(case_node);
18236 } else {
18237 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18238
18239 // If this is a case-match node (i.e., it is a pattern matching
18240 // case statement) then we must have a predicate.
18241 if (predicate == NULL) {
18242 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18243 }
18244
18245 // At this point we expect that we're parsing a case-in node. We
18246 // will continue to parse the in nodes until we hit the end of
18247 // the list.
18248 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18249 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18250
18251 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18252 parser->pattern_matching_newlines = true;
18253
18254 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18255 parser->command_start = false;
18256 parser_lex(parser);
18257
18258 pm_token_t in_keyword = parser->previous;
18259
18260 pm_constant_id_list_t captures = { 0 };
18261 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18262
18263 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18264 pm_constant_id_list_free(&captures);
18265
18266 // Since we're in the top-level of the case-in node we need
18267 // to check for guard clauses in the form of `if` or
18268 // `unless` statements.
18269 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18270 pm_token_t keyword = parser->previous;
18271 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18272 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18273 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18274 pm_token_t keyword = parser->previous;
18275 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18276 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18277 }
18278
18279 // Now we need to check for the terminator of the in node's
18280 // pattern. It can be a newline or semicolon optionally
18281 // followed by a `then` keyword.
18282 pm_token_t then_keyword;
18283 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18284 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18285 then_keyword = parser->previous;
18286 } else {
18287 then_keyword = not_provided(parser);
18288 }
18289 } else {
18290 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18291 then_keyword = parser->previous;
18292 }
18293
18294 // Now we can actually parse the statements associated with
18295 // the in node.
18296 pm_statements_node_t *statements;
18297 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18298 statements = NULL;
18299 } else {
18300 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18301 }
18302
18303 // Now that we have the full pattern and statements, we can
18304 // create the node and attach it to the case node.
18305 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
18306 pm_case_match_node_condition_append(case_node, condition);
18307 }
18308
18309 // If we didn't parse any conditions (in or when) then we need
18310 // to indicate that we have an error.
18311 if (case_node->conditions.size == 0) {
18312 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18313 }
18314
18315 node = UP(case_node);
18316 }
18317
18318 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18319 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18320 pm_token_t else_keyword = parser->previous;
18321 pm_else_node_t *else_node;
18322
18323 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18324 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18325 } else {
18326 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18327 }
18328
18329 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18330 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18331 } else {
18332 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18333 }
18334 }
18335
18336 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18337 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18338
18339 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18340 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18341 } else {
18342 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18343 }
18344
18345 pop_block_exits(parser, previous_block_exits);
18346 pm_node_list_free(&current_block_exits);
18347
18348 return node;
18349 }
18350 case PM_TOKEN_KEYWORD_BEGIN: {
18351 size_t opening_newline_index = token_newline_index(parser);
18352 parser_lex(parser);
18353
18354 pm_token_t begin_keyword = parser->previous;
18355 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18356
18357 pm_node_list_t current_block_exits = { 0 };
18358 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18359 pm_statements_node_t *begin_statements = NULL;
18360
18361 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18362 pm_accepts_block_stack_push(parser, true);
18363 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18364 pm_accepts_block_stack_pop(parser);
18365 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18366 }
18367
18368 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18369 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18370 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
18371
18372 begin_node->base.location.end = parser->previous.end;
18373 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18374
18375 pop_block_exits(parser, previous_block_exits);
18376 pm_node_list_free(&current_block_exits);
18377
18378 return UP(begin_node);
18379 }
18380 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18381 pm_node_list_t current_block_exits = { 0 };
18382 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18383
18384 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18385 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18386 }
18387
18388 parser_lex(parser);
18389 pm_token_t keyword = parser->previous;
18390
18391 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18392 pm_token_t opening = parser->previous;
18393 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18394
18395 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
18396 pm_context_t context = parser->current_context->context;
18397 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18398 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18399 }
18400
18401 flush_block_exits(parser, previous_block_exits);
18402 pm_node_list_free(&current_block_exits);
18403
18404 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18405 }
18406 case PM_TOKEN_KEYWORD_BREAK:
18407 case PM_TOKEN_KEYWORD_NEXT:
18408 case PM_TOKEN_KEYWORD_RETURN: {
18409 parser_lex(parser);
18410
18411 pm_token_t keyword = parser->previous;
18412 pm_arguments_t arguments = { 0 };
18413
18414 if (
18415 token_begins_expression_p(parser->current.type) ||
18416 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18417 ) {
18418 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18419
18420 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18421 pm_token_t next = parser->current;
18422 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18423
18424 // Reject `foo && return bar`.
18425 if (!accepts_command_call && arguments.arguments != NULL) {
18426 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18427 }
18428 }
18429 }
18430
18431 switch (keyword.type) {
18432 case PM_TOKEN_KEYWORD_BREAK: {
18433 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18434 if (!parser->partial_script) parse_block_exit(parser, node);
18435 return node;
18436 }
18437 case PM_TOKEN_KEYWORD_NEXT: {
18438 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18439 if (!parser->partial_script) parse_block_exit(parser, node);
18440 return node;
18441 }
18442 case PM_TOKEN_KEYWORD_RETURN: {
18443 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18444 parse_return(parser, node);
18445 return node;
18446 }
18447 default:
18448 assert(false && "unreachable");
18449 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18450 }
18451 }
18452 case PM_TOKEN_KEYWORD_SUPER: {
18453 parser_lex(parser);
18454
18455 pm_token_t keyword = parser->previous;
18456 pm_arguments_t arguments = { 0 };
18457 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18458
18459 if (
18460 arguments.opening_loc.start == NULL &&
18461 arguments.arguments == NULL &&
18462 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18463 ) {
18464 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18465 }
18466
18467 return UP(pm_super_node_create(parser, &keyword, &arguments));
18468 }
18469 case PM_TOKEN_KEYWORD_YIELD: {
18470 parser_lex(parser);
18471
18472 pm_token_t keyword = parser->previous;
18473 pm_arguments_t arguments = { 0 };
18474 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18475
18476 // It's possible that we've parsed a block argument through our
18477 // call to parse_arguments_list. If we found one, we should mark it
18478 // as invalid and destroy it, as we don't have a place for it on the
18479 // yield node.
18480 if (arguments.block != NULL) {
18481 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18482 pm_node_destroy(parser, arguments.block);
18483 arguments.block = NULL;
18484 }
18485
18486 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18487 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18488
18489 return node;
18490 }
18491 case PM_TOKEN_KEYWORD_CLASS: {
18492 size_t opening_newline_index = token_newline_index(parser);
18493 parser_lex(parser);
18494
18495 pm_token_t class_keyword = parser->previous;
18496 pm_do_loop_stack_push(parser, false);
18497
18498 pm_node_list_t current_block_exits = { 0 };
18499 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18500
18501 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18502 pm_token_t operator = parser->previous;
18503 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18504
18505 pm_parser_scope_push(parser, true);
18506 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18507 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18508 }
18509
18510 pm_node_t *statements = NULL;
18511 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18512 pm_accepts_block_stack_push(parser, true);
18513 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18514 pm_accepts_block_stack_pop(parser);
18515 }
18516
18517 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18518 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18519 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18520 } else {
18521 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18522 }
18523
18524 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
18525
18526 pm_constant_id_list_t locals;
18527 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18528
18529 pm_parser_scope_pop(parser);
18530 pm_do_loop_stack_pop(parser);
18531
18532 flush_block_exits(parser, previous_block_exits);
18533 pm_node_list_free(&current_block_exits);
18534
18535 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18536 }
18537
18538 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18539 pm_token_t name = parser->previous;
18540 if (name.type != PM_TOKEN_CONSTANT) {
18541 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18542 }
18543
18544 pm_token_t inheritance_operator;
18545 pm_node_t *superclass;
18546
18547 if (match1(parser, PM_TOKEN_LESS)) {
18548 inheritance_operator = parser->current;
18549 lex_state_set(parser, PM_LEX_STATE_BEG);
18550
18551 parser->command_start = true;
18552 parser_lex(parser);
18553
18554 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18555 } else {
18556 inheritance_operator = not_provided(parser);
18557 superclass = NULL;
18558 }
18559
18560 pm_parser_scope_push(parser, true);
18561
18562 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
18563 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18564 } else {
18565 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18566 }
18567 pm_node_t *statements = NULL;
18568
18569 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18570 pm_accepts_block_stack_push(parser, true);
18571 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18572 pm_accepts_block_stack_pop(parser);
18573 }
18574
18575 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18576 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18577 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18578 } else {
18579 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18580 }
18581
18582 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
18583
18584 if (context_def_p(parser)) {
18585 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18586 }
18587
18588 pm_constant_id_list_t locals;
18589 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18590
18591 pm_parser_scope_pop(parser);
18592 pm_do_loop_stack_pop(parser);
18593
18594 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18595 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18596 }
18597
18598 pop_block_exits(parser, previous_block_exits);
18599 pm_node_list_free(&current_block_exits);
18600
18601 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
18602 }
18603 case PM_TOKEN_KEYWORD_DEF: {
18604 pm_node_list_t current_block_exits = { 0 };
18605 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18606
18607 pm_token_t def_keyword = parser->current;
18608 size_t opening_newline_index = token_newline_index(parser);
18609
18610 pm_node_t *receiver = NULL;
18611 pm_token_t operator = not_provided(parser);
18612 pm_token_t name;
18613
18614 // This context is necessary for lexing `...` in a bare params
18615 // correctly. It must be pushed before lexing the first param, so it
18616 // is here.
18617 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18618 parser_lex(parser);
18619
18620 // This will be false if the method name is not a valid identifier
18621 // but could be followed by an operator.
18622 bool valid_name = true;
18623
18624 switch (parser->current.type) {
18625 case PM_CASE_OPERATOR:
18626 pm_parser_scope_push(parser, true);
18627 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18628 parser_lex(parser);
18629
18630 name = parser->previous;
18631 break;
18632 case PM_TOKEN_IDENTIFIER: {
18633 parser_lex(parser);
18634
18635 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18636 receiver = parse_variable_call(parser);
18637
18638 pm_parser_scope_push(parser, true);
18639 lex_state_set(parser, PM_LEX_STATE_FNAME);
18640 parser_lex(parser);
18641
18642 operator = parser->previous;
18643 name = parse_method_definition_name(parser);
18644 } else {
18645 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
18646 pm_parser_scope_push(parser, true);
18647
18648 name = parser->previous;
18649 }
18650
18651 break;
18652 }
18653 case PM_TOKEN_INSTANCE_VARIABLE:
18654 case PM_TOKEN_CLASS_VARIABLE:
18655 case PM_TOKEN_GLOBAL_VARIABLE:
18656 valid_name = false;
18658 case PM_TOKEN_CONSTANT:
18659 case PM_TOKEN_KEYWORD_NIL:
18660 case PM_TOKEN_KEYWORD_SELF:
18661 case PM_TOKEN_KEYWORD_TRUE:
18662 case PM_TOKEN_KEYWORD_FALSE:
18663 case PM_TOKEN_KEYWORD___FILE__:
18664 case PM_TOKEN_KEYWORD___LINE__:
18665 case PM_TOKEN_KEYWORD___ENCODING__: {
18666 pm_parser_scope_push(parser, true);
18667 parser_lex(parser);
18668
18669 pm_token_t identifier = parser->previous;
18670
18671 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18672 lex_state_set(parser, PM_LEX_STATE_FNAME);
18673 parser_lex(parser);
18674 operator = parser->previous;
18675
18676 switch (identifier.type) {
18677 case PM_TOKEN_CONSTANT:
18678 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18679 break;
18680 case PM_TOKEN_INSTANCE_VARIABLE:
18681 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18682 break;
18683 case PM_TOKEN_CLASS_VARIABLE:
18684 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18685 break;
18686 case PM_TOKEN_GLOBAL_VARIABLE:
18687 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18688 break;
18689 case PM_TOKEN_KEYWORD_NIL:
18690 receiver = UP(pm_nil_node_create(parser, &identifier));
18691 break;
18692 case PM_TOKEN_KEYWORD_SELF:
18693 receiver = UP(pm_self_node_create(parser, &identifier));
18694 break;
18695 case PM_TOKEN_KEYWORD_TRUE:
18696 receiver = UP(pm_true_node_create(parser, &identifier));
18697 break;
18698 case PM_TOKEN_KEYWORD_FALSE:
18699 receiver = UP(pm_false_node_create(parser, &identifier));
18700 break;
18701 case PM_TOKEN_KEYWORD___FILE__:
18702 receiver = UP(pm_source_file_node_create(parser, &identifier));
18703 break;
18704 case PM_TOKEN_KEYWORD___LINE__:
18705 receiver = UP(pm_source_line_node_create(parser, &identifier));
18706 break;
18707 case PM_TOKEN_KEYWORD___ENCODING__:
18708 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18709 break;
18710 default:
18711 break;
18712 }
18713
18714 name = parse_method_definition_name(parser);
18715 } else {
18716 if (!valid_name) {
18717 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18718 }
18719
18720 name = identifier;
18721 }
18722 break;
18723 }
18724 case PM_TOKEN_PARENTHESIS_LEFT: {
18725 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18726 // the inner expression of this parenthesis should not be
18727 // processed under this context. Thus, the context is popped
18728 // here.
18729 context_pop(parser);
18730 parser_lex(parser);
18731
18732 pm_token_t lparen = parser->previous;
18733 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18734
18735 accept1(parser, PM_TOKEN_NEWLINE);
18736 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18737 pm_token_t rparen = parser->previous;
18738
18739 lex_state_set(parser, PM_LEX_STATE_FNAME);
18740 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18741
18742 operator = parser->previous;
18743 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18744
18745 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18746 // reason as described the above.
18747 pm_parser_scope_push(parser, true);
18748 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18749 name = parse_method_definition_name(parser);
18750 break;
18751 }
18752 default:
18753 pm_parser_scope_push(parser, true);
18754 name = parse_method_definition_name(parser);
18755 break;
18756 }
18757
18758 pm_token_t lparen;
18759 pm_token_t rparen;
18760 pm_parameters_node_t *params;
18761
18762 bool accept_endless_def = true;
18763 switch (parser->current.type) {
18764 case PM_TOKEN_PARENTHESIS_LEFT: {
18765 parser_lex(parser);
18766 lparen = parser->previous;
18767
18768 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18769 params = NULL;
18770 } else {
18771 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
18772 }
18773
18774 lex_state_set(parser, PM_LEX_STATE_BEG);
18775 parser->command_start = true;
18776
18777 context_pop(parser);
18778 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18779 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18780 parser->previous.start = parser->previous.end;
18781 parser->previous.type = PM_TOKEN_MISSING;
18782 }
18783
18784 rparen = parser->previous;
18785 break;
18786 }
18787 case PM_CASE_PARAMETER: {
18788 // If we're about to lex a label, we need to add the label
18789 // state to make sure the next newline is ignored.
18790 if (parser->current.type == PM_TOKEN_LABEL) {
18791 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18792 }
18793
18794 lparen = not_provided(parser);
18795 rparen = not_provided(parser);
18796 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
18797
18798 // Reject `def * = 1` and similar. We have to specifically check
18799 // for them because they create ambiguity with optional arguments.
18800 accept_endless_def = false;
18801
18802 context_pop(parser);
18803 break;
18804 }
18805 default: {
18806 lparen = not_provided(parser);
18807 rparen = not_provided(parser);
18808 params = NULL;
18809
18810 context_pop(parser);
18811 break;
18812 }
18813 }
18814
18815 pm_node_t *statements = NULL;
18816 pm_token_t equal;
18817 pm_token_t end_keyword;
18818
18819 if (accept1(parser, PM_TOKEN_EQUAL)) {
18820 if (token_is_setter_name(&name)) {
18821 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18822 }
18823 if (!accept_endless_def) {
18824 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18825 }
18826 if (
18829 ) {
18830 PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18831 }
18832 equal = parser->previous;
18833
18834 context_push(parser, PM_CONTEXT_DEF);
18835 pm_do_loop_stack_push(parser, false);
18836 statements = UP(pm_statements_node_create(parser));
18837
18838 bool allow_command_call;
18839 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18840 allow_command_call = accepts_command_call;
18841 } else {
18842 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18843 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18844 }
18845
18846 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18847
18848 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18849 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18850
18851 pm_token_t rescue_keyword = parser->previous;
18852 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18853 context_pop(parser);
18854
18855 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18856 }
18857
18858 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18859 pm_do_loop_stack_pop(parser);
18860 context_pop(parser);
18861 end_keyword = not_provided(parser);
18862 } else {
18863 equal = not_provided(parser);
18864
18865 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
18866 lex_state_set(parser, PM_LEX_STATE_BEG);
18867 parser->command_start = true;
18868 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18869 } else {
18870 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18871 }
18872
18873 pm_accepts_block_stack_push(parser, true);
18874 pm_do_loop_stack_push(parser, false);
18875
18876 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18877 pm_accepts_block_stack_push(parser, true);
18878 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18879 pm_accepts_block_stack_pop(parser);
18880 }
18881
18882 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18883 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18884 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18885 } else {
18886 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18887 }
18888
18889 pm_accepts_block_stack_pop(parser);
18890 pm_do_loop_stack_pop(parser);
18891
18892 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
18893 end_keyword = parser->previous;
18894 }
18895
18896 pm_constant_id_list_t locals;
18897 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18898 pm_parser_scope_pop(parser);
18899
18905 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
18906
18907 flush_block_exits(parser, previous_block_exits);
18908 pm_node_list_free(&current_block_exits);
18909
18910 return UP(pm_def_node_create(
18911 parser,
18912 name_id,
18913 &name,
18914 receiver,
18915 params,
18916 statements,
18917 &locals,
18918 &def_keyword,
18919 &operator,
18920 &lparen,
18921 &rparen,
18922 &equal,
18923 &end_keyword
18924 ));
18925 }
18926 case PM_TOKEN_KEYWORD_DEFINED: {
18927 parser_lex(parser);
18928 pm_token_t keyword = parser->previous;
18929
18930 pm_token_t lparen;
18931 pm_token_t rparen;
18932 pm_node_t *expression;
18933
18934 context_push(parser, PM_CONTEXT_DEFINED);
18935 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
18936
18937 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
18938 lparen = parser->previous;
18939
18940 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18941 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
18942 lparen = not_provided(parser);
18943 rparen = not_provided(parser);
18944 } else {
18945 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
18946
18947 if (parser->recovering) {
18948 rparen = not_provided(parser);
18949 } else {
18950 accept1(parser, PM_TOKEN_NEWLINE);
18951 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18952 rparen = parser->previous;
18953 }
18954 }
18955 } else {
18956 lparen = not_provided(parser);
18957 rparen = not_provided(parser);
18958 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
18959 }
18960
18961 context_pop(parser);
18962 return UP(pm_defined_node_create(
18963 parser,
18964 &lparen,
18965 expression,
18966 &rparen,
18967 &keyword
18968 ));
18969 }
18970 case PM_TOKEN_KEYWORD_END_UPCASE: {
18971 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18972 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
18973 }
18974
18975 parser_lex(parser);
18976 pm_token_t keyword = parser->previous;
18977
18978 if (context_def_p(parser)) {
18979 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
18980 }
18981
18982 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
18983 pm_token_t opening = parser->previous;
18984 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
18985
18986 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
18987 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18988 }
18989 case PM_TOKEN_KEYWORD_FALSE:
18990 parser_lex(parser);
18991 return UP(pm_false_node_create(parser, &parser->previous));
18992 case PM_TOKEN_KEYWORD_FOR: {
18993 size_t opening_newline_index = token_newline_index(parser);
18994 parser_lex(parser);
18995
18996 pm_token_t for_keyword = parser->previous;
18997 pm_node_t *index;
18998
18999 context_push(parser, PM_CONTEXT_FOR_INDEX);
19000
19001 // First, parse out the first index expression.
19002 if (accept1(parser, PM_TOKEN_USTAR)) {
19003 pm_token_t star_operator = parser->previous;
19004 pm_node_t *name = NULL;
19005
19006 if (token_begins_expression_p(parser->current.type)) {
19007 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19008 }
19009
19010 index = UP(pm_splat_node_create(parser, &star_operator, name));
19011 } else if (token_begins_expression_p(parser->current.type)) {
19012 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19013 } else {
19014 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19015 index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
19016 }
19017
19018 // Now, if there are multiple index expressions, parse them out.
19019 if (match1(parser, PM_TOKEN_COMMA)) {
19020 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19021 } else {
19022 index = parse_target(parser, index, false, false);
19023 }
19024
19025 context_pop(parser);
19026 pm_do_loop_stack_push(parser, true);
19027
19028 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19029 pm_token_t in_keyword = parser->previous;
19030
19031 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19032 pm_do_loop_stack_pop(parser);
19033
19034 pm_token_t do_keyword;
19035 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19036 do_keyword = parser->previous;
19037 } else {
19038 do_keyword = not_provided(parser);
19039 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19040 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19041 }
19042 }
19043
19044 pm_statements_node_t *statements = NULL;
19045 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19046 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19047 }
19048
19049 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19050 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19051
19052 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
19053 }
19054 case PM_TOKEN_KEYWORD_IF:
19055 if (parser_end_of_line_p(parser)) {
19056 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19057 }
19058
19059 size_t opening_newline_index = token_newline_index(parser);
19060 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19061 parser_lex(parser);
19062
19063 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19064 case PM_TOKEN_KEYWORD_UNDEF: {
19065 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19066 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19067 }
19068
19069 parser_lex(parser);
19070 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19071 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19072
19073 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19074 pm_node_destroy(parser, name);
19075 } else {
19076 pm_undef_node_append(undef, name);
19077
19078 while (match1(parser, PM_TOKEN_COMMA)) {
19079 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19080 parser_lex(parser);
19081 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19082
19083 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19084 pm_node_destroy(parser, name);
19085 break;
19086 }
19087
19088 pm_undef_node_append(undef, name);
19089 }
19090 }
19091
19092 return UP(undef);
19093 }
19094 case PM_TOKEN_KEYWORD_NOT: {
19095 parser_lex(parser);
19096
19097 pm_token_t message = parser->previous;
19098 pm_arguments_t arguments = { 0 };
19099 pm_node_t *receiver = NULL;
19100
19101 // If we do not accept a command call, then we also do not accept a
19102 // not without parentheses. In this case we need to reject this
19103 // syntax.
19104 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19105 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19106 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19107 } else {
19108 accept1(parser, PM_TOKEN_NEWLINE);
19109 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19110 }
19111
19112 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
19113 }
19114
19115 accept1(parser, PM_TOKEN_NEWLINE);
19116
19117 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19118 pm_token_t lparen = parser->previous;
19119
19120 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19121 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19122 } else {
19123 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19124 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19125
19126 if (!parser->recovering) {
19127 accept1(parser, PM_TOKEN_NEWLINE);
19128 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19129 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19130 }
19131 }
19132 } else {
19133 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19134 }
19135
19136 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19137 }
19138 case PM_TOKEN_KEYWORD_UNLESS: {
19139 size_t opening_newline_index = token_newline_index(parser);
19140 parser_lex(parser);
19141
19142 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19143 }
19144 case PM_TOKEN_KEYWORD_MODULE: {
19145 pm_node_list_t current_block_exits = { 0 };
19146 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19147
19148 size_t opening_newline_index = token_newline_index(parser);
19149 parser_lex(parser);
19150 pm_token_t module_keyword = parser->previous;
19151
19152 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19153 pm_token_t name;
19154
19155 // If we can recover from a syntax error that occurred while parsing
19156 // the name of the module, then we'll handle that here.
19157 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19158 pop_block_exits(parser, previous_block_exits);
19159 pm_node_list_free(&current_block_exits);
19160
19161 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19162 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19163 }
19164
19165 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19166 pm_token_t double_colon = parser->previous;
19167
19168 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19169 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19170 }
19171
19172 // Here we retrieve the name of the module. If it wasn't a constant,
19173 // then it's possible that `module foo` was passed, which is a
19174 // syntax error. We handle that here as well.
19175 name = parser->previous;
19176 if (name.type != PM_TOKEN_CONSTANT) {
19177 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19178 }
19179
19180 pm_parser_scope_push(parser, true);
19181 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19182 pm_node_t *statements = NULL;
19183
19184 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19185 pm_accepts_block_stack_push(parser, true);
19186 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19187 pm_accepts_block_stack_pop(parser);
19188 }
19189
19190 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19191 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19192 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19193 } else {
19194 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19195 }
19196
19197 pm_constant_id_list_t locals;
19198 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19199
19200 pm_parser_scope_pop(parser);
19201 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19202
19203 if (context_def_p(parser)) {
19204 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19205 }
19206
19207 pop_block_exits(parser, previous_block_exits);
19208 pm_node_list_free(&current_block_exits);
19209
19210 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19211 }
19212 case PM_TOKEN_KEYWORD_NIL:
19213 parser_lex(parser);
19214 return UP(pm_nil_node_create(parser, &parser->previous));
19215 case PM_TOKEN_KEYWORD_REDO: {
19216 parser_lex(parser);
19217
19218 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19219 if (!parser->partial_script) parse_block_exit(parser, node);
19220
19221 return node;
19222 }
19223 case PM_TOKEN_KEYWORD_RETRY: {
19224 parser_lex(parser);
19225
19226 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19227 parse_retry(parser, node);
19228
19229 return node;
19230 }
19231 case PM_TOKEN_KEYWORD_SELF:
19232 parser_lex(parser);
19233 return UP(pm_self_node_create(parser, &parser->previous));
19234 case PM_TOKEN_KEYWORD_TRUE:
19235 parser_lex(parser);
19236 return UP(pm_true_node_create(parser, &parser->previous));
19237 case PM_TOKEN_KEYWORD_UNTIL: {
19238 size_t opening_newline_index = token_newline_index(parser);
19239
19240 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19241 pm_do_loop_stack_push(parser, true);
19242
19243 parser_lex(parser);
19244 pm_token_t keyword = parser->previous;
19245 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19246
19247 pm_do_loop_stack_pop(parser);
19248 context_pop(parser);
19249
19250 pm_token_t do_keyword;
19251 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19252 do_keyword = parser->previous;
19253 } else {
19254 do_keyword = not_provided(parser);
19255 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19256 }
19257
19258 pm_statements_node_t *statements = NULL;
19259 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19260 pm_accepts_block_stack_push(parser, true);
19261 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19262 pm_accepts_block_stack_pop(parser);
19263 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19264 }
19265
19266 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19267 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19268
19269 return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19270 }
19271 case PM_TOKEN_KEYWORD_WHILE: {
19272 size_t opening_newline_index = token_newline_index(parser);
19273
19274 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19275 pm_do_loop_stack_push(parser, true);
19276
19277 parser_lex(parser);
19278 pm_token_t keyword = parser->previous;
19279 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19280
19281 pm_do_loop_stack_pop(parser);
19282 context_pop(parser);
19283
19284 pm_token_t do_keyword;
19285 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19286 do_keyword = parser->previous;
19287 } else {
19288 do_keyword = not_provided(parser);
19289 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19290 }
19291
19292 pm_statements_node_t *statements = NULL;
19293 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19294 pm_accepts_block_stack_push(parser, true);
19295 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19296 pm_accepts_block_stack_pop(parser);
19297 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19298 }
19299
19300 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19301 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19302
19303 return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19304 }
19305 case PM_TOKEN_PERCENT_LOWER_I: {
19306 parser_lex(parser);
19307 pm_token_t opening = parser->previous;
19308 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19309 pm_node_t *current = NULL;
19310
19311 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19312 accept1(parser, PM_TOKEN_WORDS_SEP);
19313 if (match1(parser, PM_TOKEN_STRING_END)) break;
19314
19315 // Interpolation is not possible but nested heredocs can still lead to
19316 // consecutive (disjoint) string tokens when the final newline is escaped.
19317 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19318 pm_token_t opening = not_provided(parser);
19319 pm_token_t closing = not_provided(parser);
19320
19321 // Record the string node, moving to interpolation if needed.
19322 if (current == NULL) {
19323 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19324 parser_lex(parser);
19325 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19326 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19327 parser_lex(parser);
19328 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19329 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19330 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19331 pm_token_t bounds = not_provided(parser);
19332
19333 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19334 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19335 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19336 parser_lex(parser);
19337
19338 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19339 pm_interpolated_symbol_node_append(interpolated, first_string);
19340 pm_interpolated_symbol_node_append(interpolated, second_string);
19341
19342 xfree(current);
19343 current = UP(interpolated);
19344 } else {
19345 assert(false && "unreachable");
19346 }
19347 }
19348
19349 if (current) {
19350 pm_array_node_elements_append(array, current);
19351 current = NULL;
19352 } else {
19353 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19354 }
19355 }
19356
19357 pm_token_t closing = parser->current;
19358 if (match1(parser, PM_TOKEN_EOF)) {
19359 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19360 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19361 } else {
19362 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19363 }
19364 pm_array_node_close_set(array, &closing);
19365
19366 return UP(array);
19367 }
19368 case PM_TOKEN_PERCENT_UPPER_I: {
19369 parser_lex(parser);
19370 pm_token_t opening = parser->previous;
19371 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19372
19373 // This is the current node that we are parsing that will be added to the
19374 // list of elements.
19375 pm_node_t *current = NULL;
19376
19377 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19378 switch (parser->current.type) {
19379 case PM_TOKEN_WORDS_SEP: {
19380 if (current == NULL) {
19381 // If we hit a separator before we have any content, then we don't
19382 // need to do anything.
19383 } else {
19384 // If we hit a separator after we've hit content, then we need to
19385 // append that content to the list and reset the current node.
19386 pm_array_node_elements_append(array, current);
19387 current = NULL;
19388 }
19389
19390 parser_lex(parser);
19391 break;
19392 }
19393 case PM_TOKEN_STRING_CONTENT: {
19394 pm_token_t opening = not_provided(parser);
19395 pm_token_t closing = not_provided(parser);
19396
19397 if (current == NULL) {
19398 // If we hit content and the current node is NULL, then this is
19399 // the first string content we've seen. In that case we're going
19400 // to create a new string node and set that to the current.
19401 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19402 parser_lex(parser);
19403 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19404 // If we hit string content and the current node is an
19405 // interpolated string, then we need to append the string content
19406 // to the list of child nodes.
19407 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19408 parser_lex(parser);
19409
19410 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19411 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19412 // If we hit string content and the current node is a symbol node,
19413 // then we need to convert the current node into an interpolated
19414 // string and add the string content to the list of child nodes.
19415 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19416 pm_token_t bounds = not_provided(parser);
19417
19418 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19419 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19420 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19421 parser_lex(parser);
19422
19423 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19424 pm_interpolated_symbol_node_append(interpolated, first_string);
19425 pm_interpolated_symbol_node_append(interpolated, second_string);
19426
19427 xfree(current);
19428 current = UP(interpolated);
19429 } else {
19430 assert(false && "unreachable");
19431 }
19432
19433 break;
19434 }
19435 case PM_TOKEN_EMBVAR: {
19436 bool start_location_set = false;
19437 if (current == NULL) {
19438 // If we hit an embedded variable and the current node is NULL,
19439 // then this is the start of a new string. We'll set the current
19440 // node to a new interpolated string.
19441 pm_token_t opening = not_provided(parser);
19442 pm_token_t closing = not_provided(parser);
19443 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19444 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19445 // If we hit an embedded variable and the current node is a string
19446 // node, then we'll convert the current into an interpolated
19447 // string and add the string node to the list of parts.
19448 pm_token_t opening = not_provided(parser);
19449 pm_token_t closing = not_provided(parser);
19450 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19451
19452 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19453 pm_interpolated_symbol_node_append(interpolated, current);
19454 interpolated->base.location.start = current->location.start;
19455 start_location_set = true;
19456 current = UP(interpolated);
19457 } else {
19458 // If we hit an embedded variable and the current node is an
19459 // interpolated string, then we'll just add the embedded variable.
19460 }
19461
19462 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19463 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19464 if (!start_location_set) {
19465 current->location.start = part->location.start;
19466 }
19467 break;
19468 }
19469 case PM_TOKEN_EMBEXPR_BEGIN: {
19470 bool start_location_set = false;
19471 if (current == NULL) {
19472 // If we hit an embedded expression and the current node is NULL,
19473 // then this is the start of a new string. We'll set the current
19474 // node to a new interpolated string.
19475 pm_token_t opening = not_provided(parser);
19476 pm_token_t closing = not_provided(parser);
19477 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19478 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19479 // If we hit an embedded expression and the current node is a
19480 // string node, then we'll convert the current into an
19481 // interpolated string and add the string node to the list of
19482 // parts.
19483 pm_token_t opening = not_provided(parser);
19484 pm_token_t closing = not_provided(parser);
19485 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19486
19487 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19488 pm_interpolated_symbol_node_append(interpolated, current);
19489 interpolated->base.location.start = current->location.start;
19490 start_location_set = true;
19491 current = UP(interpolated);
19492 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19493 // If we hit an embedded expression and the current node is an
19494 // interpolated string, then we'll just continue on.
19495 } else {
19496 assert(false && "unreachable");
19497 }
19498
19499 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19500 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19501 if (!start_location_set) {
19502 current->location.start = part->location.start;
19503 }
19504 break;
19505 }
19506 default:
19507 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19508 parser_lex(parser);
19509 break;
19510 }
19511 }
19512
19513 // If we have a current node, then we need to append it to the list.
19514 if (current) {
19515 pm_array_node_elements_append(array, current);
19516 }
19517
19518 pm_token_t closing = parser->current;
19519 if (match1(parser, PM_TOKEN_EOF)) {
19520 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19521 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19522 } else {
19523 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19524 }
19525 pm_array_node_close_set(array, &closing);
19526
19527 return UP(array);
19528 }
19529 case PM_TOKEN_PERCENT_LOWER_W: {
19530 parser_lex(parser);
19531 pm_token_t opening = parser->previous;
19532 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19533 pm_node_t *current = NULL;
19534
19535 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19536 accept1(parser, PM_TOKEN_WORDS_SEP);
19537 if (match1(parser, PM_TOKEN_STRING_END)) break;
19538
19539 // Interpolation is not possible but nested heredocs can still lead to
19540 // consecutive (disjoint) string tokens when the final newline is escaped.
19541 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19542 pm_token_t opening = not_provided(parser);
19543 pm_token_t closing = not_provided(parser);
19544
19545 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19546
19547 // Record the string node, moving to interpolation if needed.
19548 if (current == NULL) {
19549 current = string;
19550 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19551 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19552 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19553 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19554 pm_interpolated_string_node_append(interpolated, current);
19555 pm_interpolated_string_node_append(interpolated, string);
19556 current = UP(interpolated);
19557 } else {
19558 assert(false && "unreachable");
19559 }
19560 parser_lex(parser);
19561 }
19562
19563 if (current) {
19564 pm_array_node_elements_append(array, current);
19565 current = NULL;
19566 } else {
19567 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19568 }
19569 }
19570
19571 pm_token_t closing = parser->current;
19572 if (match1(parser, PM_TOKEN_EOF)) {
19573 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19574 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19575 } else {
19576 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19577 }
19578
19579 pm_array_node_close_set(array, &closing);
19580 return UP(array);
19581 }
19582 case PM_TOKEN_PERCENT_UPPER_W: {
19583 parser_lex(parser);
19584 pm_token_t opening = parser->previous;
19585 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19586
19587 // This is the current node that we are parsing that will be added
19588 // to the list of elements.
19589 pm_node_t *current = NULL;
19590
19591 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19592 switch (parser->current.type) {
19593 case PM_TOKEN_WORDS_SEP: {
19594 // Reset the explicit encoding if we hit a separator
19595 // since each element can have its own encoding.
19596 parser->explicit_encoding = NULL;
19597
19598 if (current == NULL) {
19599 // If we hit a separator before we have any content,
19600 // then we don't need to do anything.
19601 } else {
19602 // If we hit a separator after we've hit content,
19603 // then we need to append that content to the list
19604 // and reset the current node.
19605 pm_array_node_elements_append(array, current);
19606 current = NULL;
19607 }
19608
19609 parser_lex(parser);
19610 break;
19611 }
19612 case PM_TOKEN_STRING_CONTENT: {
19613 pm_token_t opening = not_provided(parser);
19614 pm_token_t closing = not_provided(parser);
19615
19616 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19617 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19618 parser_lex(parser);
19619
19620 if (current == NULL) {
19621 // If we hit content and the current node is NULL,
19622 // then this is the first string content we've seen.
19623 // In that case we're going to create a new string
19624 // node and set that to the current.
19625 current = string;
19626 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19627 // If we hit string content and the current node is
19628 // an interpolated string, then we need to append
19629 // the string content to the list of child nodes.
19630 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19631 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19632 // If we hit string content and the current node is
19633 // a string node, then we need to convert the
19634 // current node into an interpolated string and add
19635 // the string content to the list of child nodes.
19636 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19637 pm_interpolated_string_node_append(interpolated, current);
19638 pm_interpolated_string_node_append(interpolated, string);
19639 current = UP(interpolated);
19640 } else {
19641 assert(false && "unreachable");
19642 }
19643
19644 break;
19645 }
19646 case PM_TOKEN_EMBVAR: {
19647 if (current == NULL) {
19648 // If we hit an embedded variable and the current
19649 // node is NULL, then this is the start of a new
19650 // string. We'll set the current node to a new
19651 // interpolated string.
19652 pm_token_t opening = not_provided(parser);
19653 pm_token_t closing = not_provided(parser);
19654 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19655 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19656 // If we hit an embedded variable and the current
19657 // node is a string node, then we'll convert the
19658 // current into an interpolated string and add the
19659 // string node to the list of parts.
19660 pm_token_t opening = not_provided(parser);
19661 pm_token_t closing = not_provided(parser);
19662 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19663 pm_interpolated_string_node_append(interpolated, current);
19664 current = UP(interpolated);
19665 } else {
19666 // If we hit an embedded variable and the current
19667 // node is an interpolated string, then we'll just
19668 // add the embedded variable.
19669 }
19670
19671 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19672 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19673 break;
19674 }
19675 case PM_TOKEN_EMBEXPR_BEGIN: {
19676 if (current == NULL) {
19677 // If we hit an embedded expression and the current
19678 // node is NULL, then this is the start of a new
19679 // string. We'll set the current node to a new
19680 // interpolated string.
19681 pm_token_t opening = not_provided(parser);
19682 pm_token_t closing = not_provided(parser);
19683 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19684 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19685 // If we hit an embedded expression and the current
19686 // node is a string node, then we'll convert the
19687 // current into an interpolated string and add the
19688 // string node to the list of parts.
19689 pm_token_t opening = not_provided(parser);
19690 pm_token_t closing = not_provided(parser);
19691 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19692 pm_interpolated_string_node_append(interpolated, current);
19693 current = UP(interpolated);
19694 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19695 // If we hit an embedded expression and the current
19696 // node is an interpolated string, then we'll just
19697 // continue on.
19698 } else {
19699 assert(false && "unreachable");
19700 }
19701
19702 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19703 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19704 break;
19705 }
19706 default:
19707 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19708 parser_lex(parser);
19709 break;
19710 }
19711 }
19712
19713 // If we have a current node, then we need to append it to the list.
19714 if (current) {
19715 pm_array_node_elements_append(array, current);
19716 }
19717
19718 pm_token_t closing = parser->current;
19719 if (match1(parser, PM_TOKEN_EOF)) {
19720 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19721 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19722 } else {
19723 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19724 }
19725
19726 pm_array_node_close_set(array, &closing);
19727 return UP(array);
19728 }
19729 case PM_TOKEN_REGEXP_BEGIN: {
19730 pm_token_t opening = parser->current;
19731 parser_lex(parser);
19732
19733 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19734 // If we get here, then we have an end immediately after a start. In
19735 // that case we'll create an empty content token and return an
19736 // uninterpolated regular expression.
19737 pm_token_t content = (pm_token_t) {
19738 .type = PM_TOKEN_STRING_CONTENT,
19739 .start = parser->previous.end,
19740 .end = parser->previous.end
19741 };
19742
19743 parser_lex(parser);
19744
19745 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19746 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19747
19748 return node;
19749 }
19750
19752
19753 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19754 // In this case we've hit string content so we know the regular
19755 // expression at least has something in it. We'll need to check if the
19756 // following token is the end (in which case we can return a plain
19757 // regular expression) or if it's not then it has interpolation.
19758 pm_string_t unescaped = parser->current_string;
19759 pm_token_t content = parser->current;
19760 bool ascii_only = parser->current_regular_expression_ascii_only;
19761 parser_lex(parser);
19762
19763 // If we hit an end, then we can create a regular expression
19764 // node without interpolation, which can be represented more
19765 // succinctly and more easily compiled.
19766 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19767 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19768
19769 // If we're not immediately followed by a =~, then we want
19770 // to parse all of the errors at this point. If it is
19771 // followed by a =~, then it will get parsed higher up while
19772 // parsing the named captures as well.
19773 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19774 parse_regular_expression_errors(parser, node);
19775 }
19776
19777 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19778 return UP(node);
19779 }
19780
19781 // If we get here, then we have interpolation so we'll need to create
19782 // a regular expression node with interpolation.
19783 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19784
19785 pm_token_t opening = not_provided(parser);
19786 pm_token_t closing = not_provided(parser);
19787 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19788
19789 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19790 // This is extremely strange, but the first string part of a
19791 // regular expression will always be tagged as binary if we
19792 // are in a US-ASCII file, no matter its contents.
19793 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19794 }
19795
19796 pm_interpolated_regular_expression_node_append(interpolated, part);
19797 } else {
19798 // If the first part of the body of the regular expression is not a
19799 // string content, then we have interpolation and we need to create an
19800 // interpolated regular expression node.
19801 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19802 }
19803
19804 // Now that we're here and we have interpolation, we'll parse all of the
19805 // parts into the list.
19806 pm_node_t *part;
19807 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19808 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19809 pm_interpolated_regular_expression_node_append(interpolated, part);
19810 }
19811 }
19812
19813 pm_token_t closing = parser->current;
19814 if (match1(parser, PM_TOKEN_EOF)) {
19815 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19816 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19817 } else {
19818 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19819 }
19820
19821 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19822 return UP(interpolated);
19823 }
19824 case PM_TOKEN_BACKTICK:
19825 case PM_TOKEN_PERCENT_LOWER_X: {
19826 parser_lex(parser);
19827 pm_token_t opening = parser->previous;
19828
19829 // When we get here, we don't know if this string is going to have
19830 // interpolation or not, even though it is allowed. Still, we want to be
19831 // able to return a string node without interpolation if we can since
19832 // it'll be faster.
19833 if (match1(parser, PM_TOKEN_STRING_END)) {
19834 // If we get here, then we have an end immediately after a start. In
19835 // that case we'll create an empty content token and return an
19836 // uninterpolated string.
19837 pm_token_t content = (pm_token_t) {
19838 .type = PM_TOKEN_STRING_CONTENT,
19839 .start = parser->previous.end,
19840 .end = parser->previous.end
19841 };
19842
19843 parser_lex(parser);
19844 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19845 }
19846
19848
19849 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19850 // In this case we've hit string content so we know the string
19851 // at least has something in it. We'll need to check if the
19852 // following token is the end (in which case we can return a
19853 // plain string) or if it's not then it has interpolation.
19854 pm_string_t unescaped = parser->current_string;
19855 pm_token_t content = parser->current;
19856 parser_lex(parser);
19857
19858 if (match1(parser, PM_TOKEN_STRING_END)) {
19859 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19860 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19861 parser_lex(parser);
19862 return node;
19863 }
19864
19865 // If we get here, then we have interpolation so we'll need to
19866 // create a string node with interpolation.
19867 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19868
19869 pm_token_t opening = not_provided(parser);
19870 pm_token_t closing = not_provided(parser);
19871
19872 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19873 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19874
19875 pm_interpolated_xstring_node_append(node, part);
19876 } else {
19877 // If the first part of the body of the string is not a string
19878 // content, then we have interpolation and we need to create an
19879 // interpolated string node.
19880 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19881 }
19882
19883 pm_node_t *part;
19884 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19885 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19886 pm_interpolated_xstring_node_append(node, part);
19887 }
19888 }
19889
19890 pm_token_t closing = parser->current;
19891 if (match1(parser, PM_TOKEN_EOF)) {
19892 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19893 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19894 } else {
19895 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19896 }
19897 pm_interpolated_xstring_node_closing_set(node, &closing);
19898
19899 return UP(node);
19900 }
19901 case PM_TOKEN_USTAR: {
19902 parser_lex(parser);
19903
19904 // * operators at the beginning of expressions are only valid in the
19905 // context of a multiple assignment. We enforce that here. We'll
19906 // still lex past it though and create a missing node place.
19907 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19908 pm_parser_err_prefix(parser, diag_id);
19909 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
19910 }
19911
19912 pm_token_t operator = parser->previous;
19913 pm_node_t *name = NULL;
19914
19915 if (token_begins_expression_p(parser->current.type)) {
19916 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19917 }
19918
19919 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
19920
19921 if (match1(parser, PM_TOKEN_COMMA)) {
19922 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19923 } else {
19924 return parse_target_validate(parser, splat, true);
19925 }
19926 }
19927 case PM_TOKEN_BANG: {
19928 if (binding_power > PM_BINDING_POWER_UNARY) {
19929 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19930 }
19931
19932 parser_lex(parser);
19933
19934 pm_token_t operator = parser->previous;
19935 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19936 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
19937
19938 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
19939 return UP(node);
19940 }
19941 case PM_TOKEN_TILDE: {
19942 if (binding_power > PM_BINDING_POWER_UNARY) {
19943 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19944 }
19945 parser_lex(parser);
19946
19947 pm_token_t operator = parser->previous;
19948 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19949 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
19950
19951 return UP(node);
19952 }
19953 case PM_TOKEN_UMINUS: {
19954 if (binding_power > PM_BINDING_POWER_UNARY) {
19955 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19956 }
19957 parser_lex(parser);
19958
19959 pm_token_t operator = parser->previous;
19960 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19961 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
19962
19963 return UP(node);
19964 }
19965 case PM_TOKEN_UMINUS_NUM: {
19966 parser_lex(parser);
19967
19968 pm_token_t operator = parser->previous;
19969 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19970
19971 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
19972 pm_token_t exponent_operator = parser->previous;
19973 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
19974 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
19975 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
19976 } else {
19977 switch (PM_NODE_TYPE(node)) {
19978 case PM_INTEGER_NODE:
19979 case PM_FLOAT_NODE:
19980 case PM_RATIONAL_NODE:
19981 case PM_IMAGINARY_NODE:
19982 parse_negative_numeric(node);
19983 break;
19984 default:
19985 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
19986 break;
19987 }
19988 }
19989
19990 return node;
19991 }
19992 case PM_TOKEN_MINUS_GREATER: {
19993 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
19995
19996 size_t opening_newline_index = token_newline_index(parser);
19997 pm_accepts_block_stack_push(parser, true);
19998 parser_lex(parser);
19999
20000 pm_token_t operator = parser->previous;
20001 pm_parser_scope_push(parser, false);
20002
20003 pm_block_parameters_node_t *block_parameters;
20004
20005 switch (parser->current.type) {
20006 case PM_TOKEN_PARENTHESIS_LEFT: {
20007 pm_token_t opening = parser->current;
20008 parser_lex(parser);
20009
20010 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20011 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20012 } else {
20013 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20014 }
20015
20016 accept1(parser, PM_TOKEN_NEWLINE);
20017 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20018
20019 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20020 break;
20021 }
20022 case PM_CASE_PARAMETER: {
20023 pm_accepts_block_stack_push(parser, false);
20024 pm_token_t opening = not_provided(parser);
20025 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20026 pm_accepts_block_stack_pop(parser);
20027 break;
20028 }
20029 default: {
20030 block_parameters = NULL;
20031 break;
20032 }
20033 }
20034
20035 pm_token_t opening;
20036 pm_node_t *body = NULL;
20037 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20038
20039 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20040 opening = parser->previous;
20041
20042 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20043 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20044 }
20045
20046 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20047 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20048 } else {
20049 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20050 opening = parser->previous;
20051
20052 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20053 pm_accepts_block_stack_push(parser, true);
20054 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20055 pm_accepts_block_stack_pop(parser);
20056 }
20057
20058 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20059 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20060 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20061 } else {
20062 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20063 }
20064
20065 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20066 }
20067
20068 pm_constant_id_list_t locals;
20069 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20070 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20071
20072 pm_parser_scope_pop(parser);
20073 pm_accepts_block_stack_pop(parser);
20074
20075 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20076 }
20077 case PM_TOKEN_UPLUS: {
20078 if (binding_power > PM_BINDING_POWER_UNARY) {
20079 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20080 }
20081 parser_lex(parser);
20082
20083 pm_token_t operator = parser->previous;
20084 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20085 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20086
20087 return UP(node);
20088 }
20089 case PM_TOKEN_STRING_BEGIN:
20090 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20091 case PM_TOKEN_SYMBOL_BEGIN: {
20092 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20093 parser_lex(parser);
20094
20095 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20096 }
20097 default: {
20098 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20099
20100 if (recoverable != PM_CONTEXT_NONE) {
20101 parser->recovering = true;
20102
20103 // If the given error is not the generic one, then we'll add it
20104 // here because it will provide more context in addition to the
20105 // recoverable error that we will also add.
20106 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20107 pm_parser_err_prefix(parser, diag_id);
20108 }
20109
20110 // If we get here, then we are assuming this token is closing a
20111 // parent context, so we'll indicate that to the user so that
20112 // they know how we behaved.
20113 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20114 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20115 // We're going to make a special case here, because "cannot
20116 // parse expression" is pretty generic, and we know here that we
20117 // have an unexpected token.
20118 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20119 } else {
20120 pm_parser_err_prefix(parser, diag_id);
20121 }
20122
20123 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
20124 }
20125 }
20126}
20127
20137static pm_node_t *
20138parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20139 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20140
20141 // Contradicting binding powers, the right-hand-side value of the assignment
20142 // allows the `rescue` modifier.
20143 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20144 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20145
20146 pm_token_t rescue = parser->current;
20147 parser_lex(parser);
20148
20149 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20150 context_pop(parser);
20151
20152 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20153 }
20154
20155 return value;
20156}
20157
20162static void
20163parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20164 switch (PM_NODE_TYPE(node)) {
20165 case PM_BEGIN_NODE: {
20166 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20167 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20168 break;
20169 }
20170 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20172 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20173 break;
20174 }
20175 case PM_PARENTHESES_NODE: {
20176 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20177 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20178 break;
20179 }
20180 case PM_STATEMENTS_NODE: {
20181 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20182 const pm_node_t *statement;
20183
20184 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20185 parse_assignment_value_local(parser, statement);
20186 }
20187 break;
20188 }
20189 default:
20190 break;
20191 }
20192}
20193
20206static pm_node_t *
20207parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20208 bool permitted = true;
20209 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20210
20211 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20212 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20213
20214 parse_assignment_value_local(parser, value);
20215 bool single_value = true;
20216
20217 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20218 single_value = false;
20219
20220 pm_token_t opening = not_provided(parser);
20221 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20222
20223 pm_array_node_elements_append(array, value);
20224 value = UP(array);
20225
20226 while (accept1(parser, PM_TOKEN_COMMA)) {
20227 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20228
20229 pm_array_node_elements_append(array, element);
20230 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20231
20232 parse_assignment_value_local(parser, element);
20233 }
20234 }
20235
20236 // Contradicting binding powers, the right-hand-side value of the assignment
20237 // allows the `rescue` modifier.
20238 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20239 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20240
20241 pm_token_t rescue = parser->current;
20242 parser_lex(parser);
20243
20244 bool accepts_command_call_inner = false;
20245
20246 // RHS can accept command call iff the value is a call with arguments
20247 // but without parenthesis.
20248 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20249 pm_call_node_t *call_node = (pm_call_node_t *) value;
20250 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20251 accepts_command_call_inner = true;
20252 }
20253 }
20254
20255 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20256 context_pop(parser);
20257
20258 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20259 }
20260
20261 return value;
20262}
20263
20271static void
20272parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20273 if (call_node->arguments != NULL) {
20274 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20275 pm_node_unreference(parser, UP(call_node->arguments));
20276 pm_node_destroy(parser, UP(call_node->arguments));
20277 call_node->arguments = NULL;
20278 }
20279
20280 if (call_node->block != NULL) {
20281 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20282 pm_node_unreference(parser, UP(call_node->block));
20283 pm_node_destroy(parser, UP(call_node->block));
20284 call_node->block = NULL;
20285 }
20286}
20287
20312
20313static inline const uint8_t *
20314pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20315 cursor++;
20316
20317 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20318 uint8_t value = escape_hexadecimal_digit(*cursor);
20319 cursor++;
20320
20321 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20322 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20323 cursor++;
20324 }
20325
20326 pm_buffer_append_byte(unescaped, value);
20327 } else {
20328 pm_buffer_append_string(unescaped, "\\x", 2);
20329 }
20330
20331 return cursor;
20332}
20333
20334static inline const uint8_t *
20335pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20336 uint8_t value = (uint8_t) (*cursor - '0');
20337 cursor++;
20338
20339 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20340 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20341 cursor++;
20342
20343 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20344 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20345 cursor++;
20346 }
20347 }
20348
20349 pm_buffer_append_byte(unescaped, value);
20350 return cursor;
20351}
20352
20353static inline const uint8_t *
20354pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20355 const uint8_t *start = cursor - 1;
20356 cursor++;
20357
20358 if (cursor >= end) {
20359 pm_buffer_append_string(unescaped, "\\u", 2);
20360 return cursor;
20361 }
20362
20363 if (*cursor != '{') {
20364 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20365 uint32_t value = escape_unicode(parser, cursor, length);
20366
20367 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20368 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20369 }
20370
20371 return cursor + length;
20372 }
20373
20374 cursor++;
20375 for (;;) {
20376 while (cursor < end && *cursor == ' ') cursor++;
20377
20378 if (cursor >= end) break;
20379 if (*cursor == '}') {
20380 cursor++;
20381 break;
20382 }
20383
20384 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20385 uint32_t value = escape_unicode(parser, cursor, length);
20386
20387 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20388 cursor += length;
20389 }
20390
20391 return cursor;
20392}
20393
20394static void
20395pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
20396 const uint8_t *end = source + length;
20397 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20398
20399 for (;;) {
20400 if (++cursor >= end) {
20401 pm_buffer_append_byte(unescaped, '\\');
20402 return;
20403 }
20404
20405 switch (*cursor) {
20406 case 'x':
20407 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20408 break;
20409 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20410 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20411 break;
20412 case 'u':
20413 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
20414 break;
20415 default:
20416 pm_buffer_append_byte(unescaped, '\\');
20417 break;
20418 }
20419
20420 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20421 if (next_cursor == NULL) break;
20422
20423 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20424 cursor = next_cursor;
20425 }
20426
20427 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20428}
20429
20434static void
20435parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20437
20438 pm_parser_t *parser = callback_data->parser;
20439 pm_call_node_t *call = callback_data->call;
20440 pm_constant_id_list_t *names = &callback_data->names;
20441
20442 const uint8_t *source = pm_string_source(capture);
20443 size_t length = pm_string_length(capture);
20444 pm_buffer_t unescaped = { 0 };
20445
20446 // First, we need to handle escapes within the name of the capture group.
20447 // This is because regular expressions have three different representations
20448 // in prism. The first is the plain source code. The second is the
20449 // representation that will be sent to the regular expression engine, which
20450 // is the value of the "unescaped" field. This is poorly named, because it
20451 // actually still contains escapes, just a subset of them that the regular
20452 // expression engine knows how to handle. The third representation is fully
20453 // unescaped, which is what we need.
20454 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20455 if (PRISM_UNLIKELY(cursor != NULL)) {
20456 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
20457 source = (const uint8_t *) pm_buffer_value(&unescaped);
20458 length = pm_buffer_length(&unescaped);
20459 }
20460
20461 pm_location_t location;
20462 pm_constant_id_t name;
20463
20464 // If the name of the capture group isn't a valid identifier, we do
20465 // not add it to the local table.
20466 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20467 pm_buffer_free(&unescaped);
20468 return;
20469 }
20470
20471 if (callback_data->shared) {
20472 // If the unescaped string is a slice of the source, then we can
20473 // copy the names directly. The pointers will line up.
20474 location = (pm_location_t) { .start = source, .end = source + length };
20475 name = pm_parser_constant_id_location(parser, location.start, location.end);
20476 } else {
20477 // Otherwise, the name is a slice of the malloc-ed owned string,
20478 // in which case we need to copy it out into a new string.
20479 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20480
20481 void *memory = xmalloc(length);
20482 if (memory == NULL) abort();
20483
20484 memcpy(memory, source, length);
20485 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20486 }
20487
20488 // Add this name to the list of constants if it is valid, not duplicated,
20489 // and not a keyword.
20490 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20491 pm_constant_id_list_append(names, name);
20492
20493 int depth;
20494 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20495 // If the local is not already a local but it is a keyword, then we
20496 // do not want to add a capture for this.
20497 if (pm_local_is_keyword((const char *) source, length)) {
20498 pm_buffer_free(&unescaped);
20499 return;
20500 }
20501
20502 // If the identifier is not already a local, then we will add it to
20503 // the local table.
20504 pm_parser_local_add(parser, name, location.start, location.end, 0);
20505 }
20506
20507 // Here we lazily create the MatchWriteNode since we know we're
20508 // about to add a target.
20509 if (callback_data->match == NULL) {
20510 callback_data->match = pm_match_write_node_create(parser, call);
20511 }
20512
20513 // Next, create the local variable target and add it to the list of
20514 // targets for the match.
20515 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
20516 pm_node_list_append(&callback_data->match->targets, target);
20517 }
20518
20519 pm_buffer_free(&unescaped);
20520}
20521
20526static pm_node_t *
20527parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20529 .parser = parser,
20530 .call = call,
20531 .names = { 0 },
20532 .shared = content->type == PM_STRING_SHARED
20533 };
20534
20536 .parser = parser,
20537 .start = call->receiver->location.start,
20538 .end = call->receiver->location.end,
20539 .shared = content->type == PM_STRING_SHARED
20540 };
20541
20542 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20543 pm_constant_id_list_free(&callback_data.names);
20544
20545 if (callback_data.match != NULL) {
20546 return UP(callback_data.match);
20547 } else {
20548 return UP(call);
20549 }
20550}
20551
20552static inline pm_node_t *
20553parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20554 pm_token_t token = parser->current;
20555
20556 switch (token.type) {
20557 case PM_TOKEN_EQUAL: {
20558 switch (PM_NODE_TYPE(node)) {
20559 case PM_CALL_NODE: {
20560 // If we have no arguments to the call node and we need this
20561 // to be a target then this is either a method call or a
20562 // local variable write. This _must_ happen before the value
20563 // is parsed because it could be referenced in the value.
20564 pm_call_node_t *call_node = (pm_call_node_t *) node;
20565 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20566 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20567 }
20568 }
20570 case PM_CASE_WRITABLE: {
20571 // When we have `it = value`, we need to add `it` as a local
20572 // variable before parsing the value, in case the value
20573 // references the variable.
20574 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20575 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
20576 }
20577
20578 parser_lex(parser);
20579 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20580
20581 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20582 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20583 }
20584
20585 return parse_write(parser, node, &token, value);
20586 }
20587 case PM_SPLAT_NODE: {
20588 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20589 pm_multi_target_node_targets_append(parser, multi_target, node);
20590
20591 parser_lex(parser);
20592 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20593 return parse_write(parser, UP(multi_target), &token, value);
20594 }
20595 case PM_SOURCE_ENCODING_NODE:
20596 case PM_FALSE_NODE:
20597 case PM_SOURCE_FILE_NODE:
20598 case PM_SOURCE_LINE_NODE:
20599 case PM_NIL_NODE:
20600 case PM_SELF_NODE:
20601 case PM_TRUE_NODE: {
20602 // In these special cases, we have specific error messages
20603 // and we will replace them with local variable writes.
20604 parser_lex(parser);
20605 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20606 return parse_unwriteable_write(parser, node, &token, value);
20607 }
20608 default:
20609 // In this case we have an = sign, but we don't know what
20610 // it's for. We need to treat it as an error. We'll mark it
20611 // as an error and skip past it.
20612 parser_lex(parser);
20613 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20614 return node;
20615 }
20616 }
20617 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20618 switch (PM_NODE_TYPE(node)) {
20619 case PM_BACK_REFERENCE_READ_NODE:
20620 case PM_NUMBERED_REFERENCE_READ_NODE:
20621 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20623 case PM_GLOBAL_VARIABLE_READ_NODE: {
20624 parser_lex(parser);
20625
20626 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20627 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20628
20629 pm_node_destroy(parser, node);
20630 return result;
20631 }
20632 case PM_CLASS_VARIABLE_READ_NODE: {
20633 parser_lex(parser);
20634
20635 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20636 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20637
20638 pm_node_destroy(parser, node);
20639 return result;
20640 }
20641 case PM_CONSTANT_PATH_NODE: {
20642 parser_lex(parser);
20643
20644 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20645 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20646
20647 return parse_shareable_constant_write(parser, write);
20648 }
20649 case PM_CONSTANT_READ_NODE: {
20650 parser_lex(parser);
20651
20652 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20653 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20654
20655 pm_node_destroy(parser, node);
20656 return parse_shareable_constant_write(parser, write);
20657 }
20658 case PM_INSTANCE_VARIABLE_READ_NODE: {
20659 parser_lex(parser);
20660
20661 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20662 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20663
20664 pm_node_destroy(parser, node);
20665 return result;
20666 }
20667 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20668 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20669 parser_lex(parser);
20670
20671 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20672 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20673
20674 pm_node_unreference(parser, node);
20675 pm_node_destroy(parser, node);
20676 return result;
20677 }
20678 case PM_LOCAL_VARIABLE_READ_NODE: {
20679 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20680 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20681 pm_node_unreference(parser, node);
20682 }
20683
20685 parser_lex(parser);
20686
20687 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20688 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20689
20690 pm_node_destroy(parser, node);
20691 return result;
20692 }
20693 case PM_CALL_NODE: {
20694 pm_call_node_t *cast = (pm_call_node_t *) node;
20695
20696 // If we have a vcall (a method with no arguments and no
20697 // receiver that could have been a local variable) then we
20698 // will transform it into a local variable write.
20699 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20700 pm_location_t *message_loc = &cast->message_loc;
20701 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20702
20703 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20704 parser_lex(parser);
20705
20706 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20707 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20708
20709 pm_node_destroy(parser, UP(cast));
20710 return result;
20711 }
20712
20713 // Move past the token here so that we have already added
20714 // the local variable by this point.
20715 parser_lex(parser);
20716
20717 // If there is no call operator and the message is "[]" then
20718 // this is an aref expression, and we can transform it into
20719 // an aset expression.
20720 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20721 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20722 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20723 }
20724
20725 // If this node cannot be writable, then we have an error.
20726 if (pm_call_node_writable_p(parser, cast)) {
20727 parse_write_name(parser, &cast->name);
20728 } else {
20729 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20730 }
20731
20732 parse_call_operator_write(parser, cast, &token);
20733 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20734 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20735 }
20736 case PM_MULTI_WRITE_NODE: {
20737 parser_lex(parser);
20738 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20739 return node;
20740 }
20741 default:
20742 parser_lex(parser);
20743
20744 // In this case we have an &&= sign, but we don't know what it's for.
20745 // We need to treat it as an error. For now, we'll mark it as an error
20746 // and just skip right past it.
20747 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20748 return node;
20749 }
20750 }
20751 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20752 switch (PM_NODE_TYPE(node)) {
20753 case PM_BACK_REFERENCE_READ_NODE:
20754 case PM_NUMBERED_REFERENCE_READ_NODE:
20755 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20757 case PM_GLOBAL_VARIABLE_READ_NODE: {
20758 parser_lex(parser);
20759
20760 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20761 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20762
20763 pm_node_destroy(parser, node);
20764 return result;
20765 }
20766 case PM_CLASS_VARIABLE_READ_NODE: {
20767 parser_lex(parser);
20768
20769 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20770 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20771
20772 pm_node_destroy(parser, node);
20773 return result;
20774 }
20775 case PM_CONSTANT_PATH_NODE: {
20776 parser_lex(parser);
20777
20778 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20779 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20780
20781 return parse_shareable_constant_write(parser, write);
20782 }
20783 case PM_CONSTANT_READ_NODE: {
20784 parser_lex(parser);
20785
20786 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20787 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20788
20789 pm_node_destroy(parser, node);
20790 return parse_shareable_constant_write(parser, write);
20791 }
20792 case PM_INSTANCE_VARIABLE_READ_NODE: {
20793 parser_lex(parser);
20794
20795 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20796 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20797
20798 pm_node_destroy(parser, node);
20799 return result;
20800 }
20801 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20802 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20803 parser_lex(parser);
20804
20805 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20806 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20807
20808 pm_node_unreference(parser, node);
20809 pm_node_destroy(parser, node);
20810 return result;
20811 }
20812 case PM_LOCAL_VARIABLE_READ_NODE: {
20813 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20814 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20815 pm_node_unreference(parser, node);
20816 }
20817
20819 parser_lex(parser);
20820
20821 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20822 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20823
20824 pm_node_destroy(parser, node);
20825 return result;
20826 }
20827 case PM_CALL_NODE: {
20828 pm_call_node_t *cast = (pm_call_node_t *) node;
20829
20830 // If we have a vcall (a method with no arguments and no
20831 // receiver that could have been a local variable) then we
20832 // will transform it into a local variable write.
20833 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20834 pm_location_t *message_loc = &cast->message_loc;
20835 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20836
20837 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20838 parser_lex(parser);
20839
20840 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20841 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20842
20843 pm_node_destroy(parser, UP(cast));
20844 return result;
20845 }
20846
20847 // Move past the token here so that we have already added
20848 // the local variable by this point.
20849 parser_lex(parser);
20850
20851 // If there is no call operator and the message is "[]" then
20852 // this is an aref expression, and we can transform it into
20853 // an aset expression.
20854 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20855 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20856 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20857 }
20858
20859 // If this node cannot be writable, then we have an error.
20860 if (pm_call_node_writable_p(parser, cast)) {
20861 parse_write_name(parser, &cast->name);
20862 } else {
20863 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20864 }
20865
20866 parse_call_operator_write(parser, cast, &token);
20867 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20868 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20869 }
20870 case PM_MULTI_WRITE_NODE: {
20871 parser_lex(parser);
20872 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20873 return node;
20874 }
20875 default:
20876 parser_lex(parser);
20877
20878 // In this case we have an ||= sign, but we don't know what it's for.
20879 // We need to treat it as an error. For now, we'll mark it as an error
20880 // and just skip right past it.
20881 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20882 return node;
20883 }
20884 }
20885 case PM_TOKEN_AMPERSAND_EQUAL:
20886 case PM_TOKEN_CARET_EQUAL:
20887 case PM_TOKEN_GREATER_GREATER_EQUAL:
20888 case PM_TOKEN_LESS_LESS_EQUAL:
20889 case PM_TOKEN_MINUS_EQUAL:
20890 case PM_TOKEN_PERCENT_EQUAL:
20891 case PM_TOKEN_PIPE_EQUAL:
20892 case PM_TOKEN_PLUS_EQUAL:
20893 case PM_TOKEN_SLASH_EQUAL:
20894 case PM_TOKEN_STAR_EQUAL:
20895 case PM_TOKEN_STAR_STAR_EQUAL: {
20896 switch (PM_NODE_TYPE(node)) {
20897 case PM_BACK_REFERENCE_READ_NODE:
20898 case PM_NUMBERED_REFERENCE_READ_NODE:
20899 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20901 case PM_GLOBAL_VARIABLE_READ_NODE: {
20902 parser_lex(parser);
20903
20904 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20905 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20906
20907 pm_node_destroy(parser, node);
20908 return result;
20909 }
20910 case PM_CLASS_VARIABLE_READ_NODE: {
20911 parser_lex(parser);
20912
20913 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20914 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20915
20916 pm_node_destroy(parser, node);
20917 return result;
20918 }
20919 case PM_CONSTANT_PATH_NODE: {
20920 parser_lex(parser);
20921
20922 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20923 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20924
20925 return parse_shareable_constant_write(parser, write);
20926 }
20927 case PM_CONSTANT_READ_NODE: {
20928 parser_lex(parser);
20929
20930 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20931 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20932
20933 pm_node_destroy(parser, node);
20934 return parse_shareable_constant_write(parser, write);
20935 }
20936 case PM_INSTANCE_VARIABLE_READ_NODE: {
20937 parser_lex(parser);
20938
20939 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20940 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20941
20942 pm_node_destroy(parser, node);
20943 return result;
20944 }
20945 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20946 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20947 parser_lex(parser);
20948
20949 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20950 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
20951
20952 pm_node_unreference(parser, node);
20953 pm_node_destroy(parser, node);
20954 return result;
20955 }
20956 case PM_LOCAL_VARIABLE_READ_NODE: {
20957 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20958 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20959 pm_node_unreference(parser, node);
20960 }
20961
20963 parser_lex(parser);
20964
20965 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20966 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20967
20968 pm_node_destroy(parser, node);
20969 return result;
20970 }
20971 case PM_CALL_NODE: {
20972 parser_lex(parser);
20973 pm_call_node_t *cast = (pm_call_node_t *) node;
20974
20975 // If we have a vcall (a method with no arguments and no
20976 // receiver that could have been a local variable) then we
20977 // will transform it into a local variable write.
20978 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20979 pm_location_t *message_loc = &cast->message_loc;
20980 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20981
20982 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20983 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20984 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20985
20986 pm_node_destroy(parser, UP(cast));
20987 return result;
20988 }
20989
20990 // If there is no call operator and the message is "[]" then
20991 // this is an aref expression, and we can transform it into
20992 // an aset expression.
20993 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20994 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20995 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
20996 }
20997
20998 // If this node cannot be writable, then we have an error.
20999 if (pm_call_node_writable_p(parser, cast)) {
21000 parse_write_name(parser, &cast->name);
21001 } else {
21002 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21003 }
21004
21005 parse_call_operator_write(parser, cast, &token);
21006 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21007 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21008 }
21009 case PM_MULTI_WRITE_NODE: {
21010 parser_lex(parser);
21011 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21012 return node;
21013 }
21014 default:
21015 parser_lex(parser);
21016
21017 // In this case we have an operator but we don't know what it's for.
21018 // We need to treat it as an error. For now, we'll mark it as an error
21019 // and just skip right past it.
21020 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21021 return node;
21022 }
21023 }
21024 case PM_TOKEN_AMPERSAND_AMPERSAND:
21025 case PM_TOKEN_KEYWORD_AND: {
21026 parser_lex(parser);
21027
21028 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21029 return UP(pm_and_node_create(parser, node, &token, right));
21030 }
21031 case PM_TOKEN_KEYWORD_OR:
21032 case PM_TOKEN_PIPE_PIPE: {
21033 parser_lex(parser);
21034
21035 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21036 return UP(pm_or_node_create(parser, node, &token, right));
21037 }
21038 case PM_TOKEN_EQUAL_TILDE: {
21039 // Note that we _must_ parse the value before adding the local
21040 // variables in order to properly mirror the behavior of Ruby. For
21041 // example,
21042 //
21043 // /(?<foo>bar)/ =~ foo
21044 //
21045 // In this case, `foo` should be a method call and not a local yet.
21046 parser_lex(parser);
21047 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21048
21049 // By default, we're going to create a call node and then return it.
21050 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21051 pm_node_t *result = UP(call);
21052
21053 // If the receiver of this =~ is a regular expression node, then we
21054 // need to introduce local variables for it based on its named
21055 // capture groups.
21056 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21057 // It's possible to have an interpolated regular expression node
21058 // that only contains strings. This is because it can be split
21059 // up by a heredoc. In this case we need to concat the unescaped
21060 // strings together and then parse them as a regular expression.
21062
21063 bool interpolated = false;
21064 size_t total_length = 0;
21065
21066 pm_node_t *part;
21067 PM_NODE_LIST_FOREACH(parts, index, part) {
21068 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21069 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21070 } else {
21071 interpolated = true;
21072 break;
21073 }
21074 }
21075
21076 if (!interpolated && total_length > 0) {
21077 void *memory = xmalloc(total_length);
21078 if (!memory) abort();
21079
21080 uint8_t *cursor = memory;
21081 PM_NODE_LIST_FOREACH(parts, index, part) {
21082 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21083 size_t length = pm_string_length(unescaped);
21084
21085 memcpy(cursor, pm_string_source(unescaped), length);
21086 cursor += length;
21087 }
21088
21089 pm_string_t owned;
21090 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21091
21092 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21093 pm_string_free(&owned);
21094 }
21095 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21096 // If we have a regular expression node, then we can just parse
21097 // the named captures directly off the unescaped string.
21098 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21099 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21100 }
21101
21102 return result;
21103 }
21104 case PM_TOKEN_UAMPERSAND:
21105 case PM_TOKEN_USTAR:
21106 case PM_TOKEN_USTAR_STAR:
21107 // The only times this will occur are when we are in an error state,
21108 // but we'll put them in here so that errors can propagate.
21109 case PM_TOKEN_BANG_EQUAL:
21110 case PM_TOKEN_BANG_TILDE:
21111 case PM_TOKEN_EQUAL_EQUAL:
21112 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21113 case PM_TOKEN_LESS_EQUAL_GREATER:
21114 case PM_TOKEN_CARET:
21115 case PM_TOKEN_PIPE:
21116 case PM_TOKEN_AMPERSAND:
21117 case PM_TOKEN_GREATER_GREATER:
21118 case PM_TOKEN_LESS_LESS:
21119 case PM_TOKEN_MINUS:
21120 case PM_TOKEN_PLUS:
21121 case PM_TOKEN_PERCENT:
21122 case PM_TOKEN_SLASH:
21123 case PM_TOKEN_STAR:
21124 case PM_TOKEN_STAR_STAR: {
21125 parser_lex(parser);
21126 pm_token_t operator = parser->previous;
21127 switch (PM_NODE_TYPE(node)) {
21128 case PM_RESCUE_MODIFIER_NODE: {
21130 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21131 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21132 }
21133 break;
21134 }
21135 case PM_AND_NODE: {
21136 pm_and_node_t *cast = (pm_and_node_t *) node;
21137 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21138 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21139 }
21140 break;
21141 }
21142 case PM_OR_NODE: {
21143 pm_or_node_t *cast = (pm_or_node_t *) node;
21144 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21145 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21146 }
21147 break;
21148 }
21149 default:
21150 break;
21151 }
21152
21153 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21154 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21155 }
21156 case PM_TOKEN_GREATER:
21157 case PM_TOKEN_GREATER_EQUAL:
21158 case PM_TOKEN_LESS:
21159 case PM_TOKEN_LESS_EQUAL: {
21160 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21161 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21162 }
21163
21164 parser_lex(parser);
21165 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21166 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21167 }
21168 case PM_TOKEN_AMPERSAND_DOT:
21169 case PM_TOKEN_DOT: {
21170 parser_lex(parser);
21171 pm_token_t operator = parser->previous;
21172 pm_arguments_t arguments = { 0 };
21173
21174 // This if statement handles the foo.() syntax.
21175 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21176 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21177 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21178 }
21179
21180 switch (PM_NODE_TYPE(node)) {
21181 case PM_RESCUE_MODIFIER_NODE: {
21183 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21184 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21185 }
21186 break;
21187 }
21188 case PM_AND_NODE: {
21189 pm_and_node_t *cast = (pm_and_node_t *) node;
21190 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21191 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21192 }
21193 break;
21194 }
21195 case PM_OR_NODE: {
21196 pm_or_node_t *cast = (pm_or_node_t *) node;
21197 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21198 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21199 }
21200 break;
21201 }
21202 default:
21203 break;
21204 }
21205
21206 pm_token_t message;
21207
21208 switch (parser->current.type) {
21209 case PM_CASE_OPERATOR:
21210 case PM_CASE_KEYWORD:
21211 case PM_TOKEN_CONSTANT:
21212 case PM_TOKEN_IDENTIFIER:
21213 case PM_TOKEN_METHOD_NAME: {
21214 parser_lex(parser);
21215 message = parser->previous;
21216 break;
21217 }
21218 default: {
21219 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21220 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21221 }
21222 }
21223
21224 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21225 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21226
21227 if (
21228 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21229 arguments.arguments == NULL &&
21230 arguments.opening_loc.start == NULL &&
21231 match1(parser, PM_TOKEN_COMMA)
21232 ) {
21233 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21234 } else {
21235 return UP(call);
21236 }
21237 }
21238 case PM_TOKEN_DOT_DOT:
21239 case PM_TOKEN_DOT_DOT_DOT: {
21240 parser_lex(parser);
21241
21242 pm_node_t *right = NULL;
21243 if (token_begins_expression_p(parser->current.type)) {
21244 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21245 }
21246
21247 return UP(pm_range_node_create(parser, node, &token, right));
21248 }
21249 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21250 pm_token_t keyword = parser->current;
21251 parser_lex(parser);
21252
21253 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21254 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21255 }
21256 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21257 pm_token_t keyword = parser->current;
21258 parser_lex(parser);
21259
21260 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21261 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21262 }
21263 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21264 parser_lex(parser);
21265 pm_statements_node_t *statements = pm_statements_node_create(parser);
21266 pm_statements_node_body_append(parser, statements, node, true);
21267
21268 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21269 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21270 }
21271 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21272 parser_lex(parser);
21273 pm_statements_node_t *statements = pm_statements_node_create(parser);
21274 pm_statements_node_body_append(parser, statements, node, true);
21275
21276 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21277 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21278 }
21279 case PM_TOKEN_QUESTION_MARK: {
21280 context_push(parser, PM_CONTEXT_TERNARY);
21281 pm_node_list_t current_block_exits = { 0 };
21282 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21283
21284 pm_token_t qmark = parser->current;
21285 parser_lex(parser);
21286
21287 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21288
21289 if (parser->recovering) {
21290 // If parsing the true expression of this ternary resulted in a syntax
21291 // error that we can recover from, then we're going to put missing nodes
21292 // and tokens into the remaining places. We want to be sure to do this
21293 // before the `expect` function call to make sure it doesn't
21294 // accidentally move past a ':' token that occurs after the syntax
21295 // error.
21296 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21297 pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
21298
21299 context_pop(parser);
21300 pop_block_exits(parser, previous_block_exits);
21301 pm_node_list_free(&current_block_exits);
21302
21303 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21304 }
21305
21306 accept1(parser, PM_TOKEN_NEWLINE);
21307 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21308
21309 pm_token_t colon = parser->previous;
21310 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21311
21312 context_pop(parser);
21313 pop_block_exits(parser, previous_block_exits);
21314 pm_node_list_free(&current_block_exits);
21315
21316 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21317 }
21318 case PM_TOKEN_COLON_COLON: {
21319 parser_lex(parser);
21320 pm_token_t delimiter = parser->previous;
21321
21322 switch (parser->current.type) {
21323 case PM_TOKEN_CONSTANT: {
21324 parser_lex(parser);
21325 pm_node_t *path;
21326
21327 if (
21328 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21329 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21330 ) {
21331 // If we have a constant immediately following a '::' operator, then
21332 // this can either be a constant path or a method call, depending on
21333 // what follows the constant.
21334 //
21335 // If we have parentheses, then this is a method call. That would
21336 // look like Foo::Bar().
21337 pm_token_t message = parser->previous;
21338 pm_arguments_t arguments = { 0 };
21339
21340 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21341 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21342 } else {
21343 // Otherwise, this is a constant path. That would look like Foo::Bar.
21344 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21345 }
21346
21347 // If this is followed by a comma then it is a multiple assignment.
21348 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21349 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21350 }
21351
21352 return path;
21353 }
21354 case PM_CASE_OPERATOR:
21355 case PM_CASE_KEYWORD:
21356 case PM_TOKEN_IDENTIFIER:
21357 case PM_TOKEN_METHOD_NAME: {
21358 parser_lex(parser);
21359 pm_token_t message = parser->previous;
21360
21361 // If we have an identifier following a '::' operator, then it is for
21362 // sure a method call.
21363 pm_arguments_t arguments = { 0 };
21364 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21365 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21366
21367 // If this is followed by a comma then it is a multiple assignment.
21368 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21369 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21370 }
21371
21372 return UP(call);
21373 }
21374 case PM_TOKEN_PARENTHESIS_LEFT: {
21375 // If we have a parenthesis following a '::' operator, then it is the
21376 // method call shorthand. That would look like Foo::(bar).
21377 pm_arguments_t arguments = { 0 };
21378 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21379
21380 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21381 }
21382 default: {
21383 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21384 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21385 }
21386 }
21387 }
21388 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21389 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21390 parser_lex(parser);
21391 accept1(parser, PM_TOKEN_NEWLINE);
21392
21393 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21394 context_pop(parser);
21395
21396 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21397 }
21398 case PM_TOKEN_BRACKET_LEFT: {
21399 parser_lex(parser);
21400
21401 pm_arguments_t arguments = { 0 };
21402 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21403
21404 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21405 pm_accepts_block_stack_push(parser, true);
21406 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21407 pm_accepts_block_stack_pop(parser);
21408 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21409 }
21410
21411 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21412
21413 // If we have a comma after the closing bracket then this is a multiple
21414 // assignment and we should parse the targets.
21415 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21416 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21417 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21418 }
21419
21420 // If we're at the end of the arguments, we can now check if there is a
21421 // block node that starts with a {. If there is, then we can parse it and
21422 // add it to the arguments.
21423 pm_block_node_t *block = NULL;
21424 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21425 block = parse_block(parser, (uint16_t) (depth + 1));
21426 pm_arguments_validate_block(parser, &arguments, block);
21427 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21428 block = parse_block(parser, (uint16_t) (depth + 1));
21429 }
21430
21431 if (block != NULL) {
21432 if (arguments.block != NULL) {
21433 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21434 if (arguments.arguments == NULL) {
21435 arguments.arguments = pm_arguments_node_create(parser);
21436 }
21437 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21438 }
21439
21440 arguments.block = UP(block);
21441 }
21442
21443 return UP(pm_call_node_aref_create(parser, node, &arguments));
21444 }
21445 case PM_TOKEN_KEYWORD_IN: {
21446 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21447 parser->pattern_matching_newlines = true;
21448
21449 pm_token_t operator = parser->current;
21450 parser->command_start = false;
21451 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21452 parser_lex(parser);
21453
21454 pm_constant_id_list_t captures = { 0 };
21455 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21456
21457 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21458 pm_constant_id_list_free(&captures);
21459
21460 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21461 }
21462 case PM_TOKEN_EQUAL_GREATER: {
21463 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21464 parser->pattern_matching_newlines = true;
21465
21466 pm_token_t operator = parser->current;
21467 parser->command_start = false;
21468 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21469 parser_lex(parser);
21470
21471 pm_constant_id_list_t captures = { 0 };
21472 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21473
21474 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21475 pm_constant_id_list_free(&captures);
21476
21477 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21478 }
21479 default:
21480 assert(false && "unreachable");
21481 return NULL;
21482 }
21483}
21484
21485#undef PM_PARSE_PATTERN_SINGLE
21486#undef PM_PARSE_PATTERN_TOP
21487#undef PM_PARSE_PATTERN_MULTI
21488
21493static inline bool
21494pm_call_node_command_p(const pm_call_node_t *node) {
21495 return (
21496 (node->opening_loc.start == NULL) &&
21497 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21498 (node->arguments != NULL || node->block != NULL)
21499 );
21500}
21501
21510static pm_node_t *
21511parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21512 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21513 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21514 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
21515 }
21516
21517 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21518
21519 switch (PM_NODE_TYPE(node)) {
21520 case PM_MISSING_NODE:
21521 // If we found a syntax error, then the type of node returned by
21522 // parse_expression_prefix is going to be a missing node.
21523 return node;
21524 case PM_PRE_EXECUTION_NODE:
21525 case PM_POST_EXECUTION_NODE:
21526 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21527 case PM_ALIAS_METHOD_NODE:
21528 case PM_MULTI_WRITE_NODE:
21529 case PM_UNDEF_NODE:
21530 // These expressions are statements, and cannot be followed by
21531 // operators (except modifiers).
21532 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21533 return node;
21534 }
21535 break;
21536 case PM_CALL_NODE:
21537 // If we have a call node, then we need to check if it looks like a
21538 // method call without parentheses that contains arguments. If it
21539 // does, then it has different rules for parsing infix operators,
21540 // namely that it only accepts composition (and/or) and modifiers
21541 // (if/unless/etc.).
21542 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21543 return node;
21544 }
21545 break;
21546 case PM_SYMBOL_NODE:
21547 // If we have a symbol node that is being parsed as a label, then we
21548 // need to immediately return, because there should never be an
21549 // infix operator following this node.
21550 if (pm_symbol_node_label_p(node)) {
21551 return node;
21552 }
21553 break;
21554 default:
21555 break;
21556 }
21557
21558 // Otherwise we'll look and see if the next token can be parsed as an infix
21559 // operator. If it can, then we'll parse it using parse_expression_infix.
21560 pm_binding_powers_t current_binding_powers;
21561 pm_token_type_t current_token_type;
21562
21563 while (
21564 current_token_type = parser->current.type,
21565 current_binding_powers = pm_binding_powers[current_token_type],
21566 binding_power <= current_binding_powers.left &&
21567 current_binding_powers.binary
21568 ) {
21569 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21570
21571 if (context_terminator(parser->current_context->context, &parser->current)) {
21572 // If this token terminates the current context, then we need to
21573 // stop parsing the expression, as it has become a statement.
21574 return node;
21575 }
21576
21577 switch (PM_NODE_TYPE(node)) {
21578 case PM_MULTI_WRITE_NODE:
21579 // Multi-write nodes are statements, and cannot be followed by
21580 // operators except modifiers.
21581 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21582 return node;
21583 }
21584 break;
21585 case PM_CLASS_VARIABLE_WRITE_NODE:
21586 case PM_CONSTANT_PATH_WRITE_NODE:
21587 case PM_CONSTANT_WRITE_NODE:
21588 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21589 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21590 case PM_LOCAL_VARIABLE_WRITE_NODE:
21591 // These expressions are statements, by virtue of the right-hand
21592 // side of their write being an implicit array.
21593 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21594 return node;
21595 }
21596 break;
21597 case PM_CALL_NODE:
21598 // These expressions are also statements, by virtue of the
21599 // right-hand side of the expression (i.e., the last argument to
21600 // the call node) being an implicit array.
21601 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21602 return node;
21603 }
21604 break;
21605 default:
21606 break;
21607 }
21608
21609 // If the operator is nonassoc and we should not be able to parse the
21610 // upcoming infix operator, break.
21611 if (current_binding_powers.nonassoc) {
21612 // If this is a non-assoc operator and we are about to parse the
21613 // exact same operator, then we need to add an error.
21614 if (match1(parser, current_token_type)) {
21615 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21616 break;
21617 }
21618
21619 // If this is an endless range, then we need to reject a couple of
21620 // additional operators because it violates the normal operator
21621 // precedence rules. Those patterns are:
21622 //
21623 // 1.. & 2
21624 // 1.. * 2
21625 //
21626 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21627 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21628 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21629 break;
21630 }
21631
21632 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21633 break;
21634 }
21635 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21636 break;
21637 }
21638 }
21639
21640 if (accepts_command_call) {
21641 // A command-style method call is only accepted on method chains.
21642 // Thus, we check whether the parsed node can continue method chains.
21643 // The method chain can continue if the parsed node is one of the following five kinds:
21644 // (1) index access: foo[1]
21645 // (2) attribute access: foo.bar
21646 // (3) method call with parenthesis: foo.bar(1)
21647 // (4) method call with a block: foo.bar do end
21648 // (5) constant path: foo::Bar
21649 switch (node->type) {
21650 case PM_CALL_NODE: {
21651 pm_call_node_t *cast = (pm_call_node_t *)node;
21652 if (
21653 // (1) foo[1]
21654 !(
21655 cast->call_operator_loc.start == NULL &&
21656 cast->message_loc.start != NULL &&
21657 cast->message_loc.start[0] == '[' &&
21658 cast->message_loc.end[-1] == ']'
21659 ) &&
21660 // (2) foo.bar
21661 !(
21662 cast->call_operator_loc.start != NULL &&
21663 cast->arguments == NULL &&
21664 cast->block == NULL &&
21665 cast->opening_loc.start == NULL
21666 ) &&
21667 // (3) foo.bar(1)
21668 !(
21669 cast->call_operator_loc.start != NULL &&
21670 cast->opening_loc.start != NULL
21671 ) &&
21672 // (4) foo.bar do end
21673 !(
21674 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21675 )
21676 ) {
21677 accepts_command_call = false;
21678 }
21679 break;
21680 }
21681 // (5) foo::Bar
21682 case PM_CONSTANT_PATH_NODE:
21683 break;
21684 default:
21685 accepts_command_call = false;
21686 break;
21687 }
21688 }
21689 }
21690
21691 return node;
21692}
21693
21698static pm_statements_node_t *
21699wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21700 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21701 if (statements == NULL) {
21702 statements = pm_statements_node_create(parser);
21703 }
21704
21705 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21706 pm_arguments_node_arguments_append(
21707 arguments,
21708 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21709 );
21710
21711 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21712 parser,
21713 arguments,
21714 pm_parser_constant_id_constant(parser, "print", 5)
21715 )), true);
21716 }
21717
21718 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21719 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21720 if (statements == NULL) {
21721 statements = pm_statements_node_create(parser);
21722 }
21723
21724 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21725 pm_arguments_node_arguments_append(
21726 arguments,
21727 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21728 );
21729
21730 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21731 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21732
21733 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21734 parser,
21735 pm_parser_constant_id_constant(parser, "$F", 2),
21736 UP(call)
21737 );
21738
21739 pm_statements_node_body_prepend(statements, UP(write));
21740 }
21741
21742 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21743 pm_arguments_node_arguments_append(
21744 arguments,
21745 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21746 );
21747
21748 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21749 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21750 pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
21751 parser,
21752 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21753 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21754 UP(pm_true_node_synthesized_create(parser))
21755 )));
21756
21757 pm_arguments_node_arguments_append(arguments, UP(keywords));
21758 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21759 }
21760
21761 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21762 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21763 parser,
21764 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21765 statements
21766 )), true);
21767
21768 statements = wrapped_statements;
21769 }
21770
21771 return statements;
21772}
21773
21777static pm_node_t *
21778parse_program(pm_parser_t *parser) {
21779 // If the current scope is NULL, then we want to push a new top level scope.
21780 // The current scope could exist in the event that we are parsing an eval
21781 // and the user has passed into scopes that already exist.
21782 if (parser->current_scope == NULL) {
21783 pm_parser_scope_push(parser, true);
21784 }
21785
21786 pm_node_list_t current_block_exits = { 0 };
21787 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21788
21789 parser_lex(parser);
21790 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21791
21792 if (statements != NULL && !parser->parsing_eval) {
21793 // If we have statements, then the top-level statement should be
21794 // explicitly checked as well. We have to do this here because
21795 // everywhere else we check all but the last statement.
21796 assert(statements->body.size > 0);
21797 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21798 }
21799
21800 pm_constant_id_list_t locals;
21801 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21802 pm_parser_scope_pop(parser);
21803
21804 // At the top level, see if we need to wrap the statements in a program
21805 // node with a while loop based on the options.
21807 statements = wrap_statements(parser, statements);
21808 } else {
21809 flush_block_exits(parser, previous_block_exits);
21810 }
21811
21812 pm_node_list_free(&current_block_exits);
21813
21814 // If this is an empty file, then we're still going to parse all of the
21815 // statements in order to gather up all of the comments and such. Here we'll
21816 // correct the location information.
21817 if (statements == NULL) {
21818 statements = pm_statements_node_create(parser);
21819 pm_statements_node_location_set(statements, parser->start, parser->start);
21820 }
21821
21822 return UP(pm_program_node_create(parser, &locals, statements));
21823}
21824
21825/******************************************************************************/
21826/* External functions */
21827/******************************************************************************/
21828
21838static const char *
21839pm_strnstr(const char *big, const char *little, size_t big_length) {
21840 size_t little_length = strlen(little);
21841
21842 for (const char *max = big + big_length - little_length; big <= max; big++) {
21843 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21844 }
21845
21846 return NULL;
21847}
21848
21849#ifdef _WIN32
21850#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21851#else
21857static void
21858pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21859 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21860 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
21861 }
21862}
21863#endif
21864
21869static void
21870pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21871 const char *switches = pm_strnstr(engine, " -", length);
21872 if (switches == NULL) return;
21873
21874 pm_options_t next_options = *options;
21875 options->shebang_callback(
21876 &next_options,
21877 (const uint8_t *) (switches + 1),
21878 length - ((size_t) (switches - engine)) - 1,
21879 options->shebang_callback_data
21880 );
21881
21882 size_t encoding_length;
21883 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21884 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
21885 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21886 }
21887
21888 parser->command_line = next_options.command_line;
21889 parser->frozen_string_literal = next_options.frozen_string_literal;
21890}
21891
21896pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
21897 assert(source != NULL);
21898
21899 *parser = (pm_parser_t) {
21900 .node_id = 0,
21901 .lex_state = PM_LEX_STATE_BEG,
21902 .enclosure_nesting = 0,
21903 .lambda_enclosure_nesting = -1,
21904 .brace_nesting = 0,
21905 .do_loop_stack = 0,
21906 .accepts_block_stack = 0,
21907 .lex_modes = {
21908 .index = 0,
21909 .stack = {{ .mode = PM_LEX_DEFAULT }},
21910 .current = &parser->lex_modes.stack[0],
21911 },
21912 .start = source,
21913 .end = source + size,
21914 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21915 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21916 .next_start = NULL,
21917 .heredoc_end = NULL,
21918 .data_loc = { .start = NULL, .end = NULL },
21919 .comment_list = { 0 },
21920 .magic_comment_list = { 0 },
21921 .warning_list = { 0 },
21922 .error_list = { 0 },
21923 .current_scope = NULL,
21924 .current_context = NULL,
21925 .encoding = PM_ENCODING_UTF_8_ENTRY,
21926 .encoding_changed_callback = NULL,
21927 .encoding_comment_start = source,
21928 .lex_callback = NULL,
21929 .filepath = { 0 },
21930 .constant_pool = { 0 },
21931 .newline_list = { 0 },
21932 .integer_base = 0,
21933 .current_string = PM_STRING_EMPTY,
21934 .start_line = 1,
21935 .explicit_encoding = NULL,
21936 .command_line = 0,
21937 .parsing_eval = false,
21938 .partial_script = false,
21939 .command_start = true,
21940 .recovering = false,
21941 .encoding_locked = false,
21942 .encoding_changed = false,
21943 .pattern_matching_newlines = false,
21944 .in_keyword_arg = false,
21945 .current_block_exits = NULL,
21946 .semantic_token_seen = false,
21947 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
21948 .current_regular_expression_ascii_only = false,
21949 .warn_mismatched_indentation = true
21950 };
21951
21952 // Initialize the constant pool. We're going to completely guess as to the
21953 // number of constants that we'll need based on the size of the input. The
21954 // ratio we chose here is actually less arbitrary than you might think.
21955 //
21956 // We took ~50K Ruby files and measured the size of the file versus the
21957 // number of constants that were found in those files. Then we found the
21958 // average and standard deviation of the ratios of constants/bytesize. Then
21959 // we added 1.34 standard deviations to the average to get a ratio that
21960 // would fit 75% of the files (for a two-tailed distribution). This works
21961 // because there was about a 0.77 correlation and the distribution was
21962 // roughly normal.
21963 //
21964 // This ratio will need to change if we add more constants to the constant
21965 // pool for another node type.
21966 uint32_t constant_size = ((uint32_t) size) / 95;
21967 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
21968
21969 // Initialize the newline list. Similar to the constant pool, we're going to
21970 // guess at the number of newlines that we'll need based on the size of the
21971 // input.
21972 size_t newline_size = size / 22;
21973 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
21974
21975 // If options were provided to this parse, establish them here.
21976 if (options != NULL) {
21977 // filepath option
21978 parser->filepath = options->filepath;
21979
21980 // line option
21981 parser->start_line = options->line;
21982
21983 // encoding option
21984 size_t encoding_length = pm_string_length(&options->encoding);
21985 if (encoding_length > 0) {
21986 const uint8_t *encoding_source = pm_string_source(&options->encoding);
21987 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21988 }
21989
21990 // encoding_locked option
21991 parser->encoding_locked = options->encoding_locked;
21992
21993 // frozen_string_literal option
21995
21996 // command_line option
21997 parser->command_line = options->command_line;
21998
21999 // version option
22000 parser->version = options->version;
22001
22002 // partial_script
22003 parser->partial_script = options->partial_script;
22004
22005 // scopes option
22006 parser->parsing_eval = options->scopes_count > 0;
22007 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22008
22009 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22010 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22011 pm_parser_scope_push(parser, scope_index == 0);
22012
22013 // Scopes given from the outside are not allowed to have numbered
22014 // parameters.
22015 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22016
22017 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22018 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22019
22020 const uint8_t *source = pm_string_source(local);
22021 size_t length = pm_string_length(local);
22022
22023 void *allocated = xmalloc(length);
22024 if (allocated == NULL) continue;
22025
22026 memcpy(allocated, source, length);
22027 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22028 }
22029 }
22030 }
22031
22032 // Now that we have established the user-provided options, check if
22033 // a version was given and parse as the latest version otherwise.
22034 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22036 }
22037
22038 pm_accepts_block_stack_push(parser, true);
22039
22040 // Skip past the UTF-8 BOM if it exists.
22041 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22042 parser->current.end += 3;
22043 parser->encoding_comment_start += 3;
22044
22045 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22047 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22048 }
22049 }
22050
22051 // If the -x command line flag is set, or the first shebang of the file does
22052 // not include "ruby", then we'll search for a shebang that does include
22053 // "ruby" and start parsing from there.
22054 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22055
22056 // If the first two bytes of the source are a shebang, then we will do a bit
22057 // of extra processing.
22058 //
22059 // First, we'll indicate that the encoding comment is at the end of the
22060 // shebang. This means that when a shebang is present the encoding comment
22061 // can begin on the second line.
22062 //
22063 // Second, we will check if the shebang includes "ruby". If it does, then we
22064 // we will start parsing from there. We will also potentially warning the
22065 // user if there is a carriage return at the end of the shebang. We will
22066 // also potentially call the shebang callback if this is the main script to
22067 // allow the caller to parse the shebang and find any command-line options.
22068 // If the shebang does not include "ruby" and this is the main script being
22069 // parsed, then we will start searching the file for a shebang that does
22070 // contain "ruby" as if -x were passed on the command line.
22071 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22072 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22073
22074 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22075 const char *engine;
22076
22077 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22078 if (newline != NULL) {
22079 parser->encoding_comment_start = newline + 1;
22080
22081 if (options == NULL || options->main_script) {
22082 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22083 }
22084 }
22085
22086 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22087 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22088 }
22089
22090 search_shebang = false;
22091 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22092 search_shebang = true;
22093 }
22094 }
22095
22096 // Here we're going to find the first shebang that includes "ruby" and start
22097 // parsing from there.
22098 if (search_shebang) {
22099 // If a shebang that includes "ruby" is not found, then we're going to a
22100 // a load error to the list of errors on the parser.
22101 bool found_shebang = false;
22102
22103 // This is going to point to the start of each line as we check it.
22104 // We'll maintain a moving window looking at each line at they come.
22105 const uint8_t *cursor = parser->start;
22106
22107 // The newline pointer points to the end of the current line that we're
22108 // considering. If it is NULL, then we're at the end of the file.
22109 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22110
22111 while (newline != NULL) {
22112 pm_newline_list_append(&parser->newline_list, newline);
22113
22114 cursor = newline + 1;
22115 newline = next_newline(cursor, parser->end - cursor);
22116
22117 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22118 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22119 const char *engine;
22120 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22121 found_shebang = true;
22122
22123 if (newline != NULL) {
22124 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22125 parser->encoding_comment_start = newline + 1;
22126 }
22127
22128 if (options != NULL && options->shebang_callback != NULL) {
22129 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22130 }
22131
22132 break;
22133 }
22134 }
22135 }
22136
22137 if (found_shebang) {
22138 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22139 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22140 } else {
22141 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22142 pm_newline_list_clear(&parser->newline_list);
22143 }
22144 }
22145
22146 // The encoding comment can start after any amount of inline whitespace, so
22147 // here we'll advance it to the first non-inline-whitespace character so
22148 // that it is ready for future comparisons.
22149 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22150}
22151
22160
22164static inline void
22165pm_comment_list_free(pm_list_t *list) {
22166 pm_list_node_t *node, *next;
22167
22168 for (node = list->head; node != NULL; node = next) {
22169 next = node->next;
22170
22171 pm_comment_t *comment = (pm_comment_t *) node;
22172 xfree(comment);
22173 }
22174}
22175
22179static inline void
22180pm_magic_comment_list_free(pm_list_t *list) {
22181 pm_list_node_t *node, *next;
22182
22183 for (node = list->head; node != NULL; node = next) {
22184 next = node->next;
22185
22188 }
22189}
22190
22196 pm_string_free(&parser->filepath);
22197 pm_diagnostic_list_free(&parser->error_list);
22198 pm_diagnostic_list_free(&parser->warning_list);
22199 pm_comment_list_free(&parser->comment_list);
22200 pm_magic_comment_list_free(&parser->magic_comment_list);
22201 pm_constant_pool_free(&parser->constant_pool);
22202 pm_newline_list_free(&parser->newline_list);
22203
22204 while (parser->current_scope != NULL) {
22205 // Normally, popping the scope doesn't free the locals since it is
22206 // assumed that ownership has transferred to the AST. However if we have
22207 // scopes while we're freeing the parser, it's likely they came from
22208 // eval scopes and we need to free them explicitly here.
22209 pm_parser_scope_pop(parser);
22210 }
22211
22212 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22213 lex_mode_pop(parser);
22214 }
22215}
22216
22222 return parse_program(parser);
22223}
22224
22230static bool
22231pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22232#define LINE_SIZE 4096
22233 char line[LINE_SIZE];
22234
22235 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22236 size_t length = LINE_SIZE;
22237 while (length > 0 && line[length - 1] == '\n') length--;
22238
22239 if (length == LINE_SIZE) {
22240 // If we read a line that is the maximum size and it doesn't end
22241 // with a newline, then we'll just append it to the buffer and
22242 // continue reading.
22243 length--;
22244 pm_buffer_append_string(buffer, line, length);
22245 continue;
22246 }
22247
22248 // Append the line to the buffer.
22249 length--;
22250 pm_buffer_append_string(buffer, line, length);
22251
22252 // Check if the line matches the __END__ marker. If it does, then stop
22253 // reading and return false. In most circumstances, this means we should
22254 // stop reading from the stream so that the DATA constant can pick it
22255 // up.
22256 switch (length) {
22257 case 7:
22258 if (strncmp(line, "__END__", 7) == 0) return false;
22259 break;
22260 case 8:
22261 if (strncmp(line, "__END__\n", 8) == 0) return false;
22262 break;
22263 case 9:
22264 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22265 break;
22266 }
22267
22268 // All data should be read via gets. If the string returned by gets
22269 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22270 if (stream_feof(stream)) {
22271 break;
22272 }
22273 }
22274
22275 return true;
22276#undef LINE_SIZE
22277}
22278
22288static bool
22289pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22290 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22291
22292 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22293 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22294 return true;
22295 }
22296 }
22297
22298 return false;
22299}
22300
22308pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22309 pm_buffer_init(buffer);
22310
22311 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22312
22313 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22314 pm_node_t *node = pm_parse(parser);
22315
22316 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22317 pm_node_destroy(parser, node);
22318 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22319
22320 pm_parser_free(parser);
22321 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22322 node = pm_parse(parser);
22323 }
22324
22325 return node;
22326}
22327
22332pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22333 pm_options_t options = { 0 };
22334 pm_options_read(&options, data);
22335
22336 pm_parser_t parser;
22337 pm_parser_init(&parser, source, size, &options);
22338
22339 pm_node_t *node = pm_parse(&parser);
22340 pm_node_destroy(&parser, node);
22341
22342 bool result = parser.error_list.size == 0;
22343 pm_parser_free(&parser);
22344 pm_options_free(&options);
22345
22346 return result;
22347}
22348
22349#undef PM_CASE_KEYWORD
22350#undef PM_CASE_OPERATOR
22351#undef PM_CASE_WRITABLE
22352#undef PM_STRING_EMPTY
22353
22354// We optionally support serializing to a binary string. For systems that don't
22355// want or need this functionality, it can be turned off with the
22356// PRISM_EXCLUDE_SERIALIZATION define.
22357#ifndef PRISM_EXCLUDE_SERIALIZATION
22358
22359static inline void
22360pm_serialize_header(pm_buffer_t *buffer) {
22361 pm_buffer_append_string(buffer, "PRISM", 5);
22362 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22363 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22364 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22365 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22366}
22367
22372pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22373 pm_serialize_header(buffer);
22374 pm_serialize_content(parser, node, buffer);
22375 pm_buffer_append_byte(buffer, '\0');
22376}
22377
22383pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22384 pm_options_t options = { 0 };
22385 pm_options_read(&options, data);
22386
22387 pm_parser_t parser;
22388 pm_parser_init(&parser, source, size, &options);
22389
22390 pm_node_t *node = pm_parse(&parser);
22391
22392 pm_serialize_header(buffer);
22393 pm_serialize_content(&parser, node, buffer);
22394 pm_buffer_append_byte(buffer, '\0');
22395
22396 pm_node_destroy(&parser, node);
22397 pm_parser_free(&parser);
22398 pm_options_free(&options);
22399}
22400
22406pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22407 pm_parser_t parser;
22408 pm_options_t options = { 0 };
22409 pm_options_read(&options, data);
22410
22411 pm_buffer_t parser_buffer;
22412 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22413 pm_serialize_header(buffer);
22414 pm_serialize_content(&parser, node, buffer);
22415 pm_buffer_append_byte(buffer, '\0');
22416
22417 pm_node_destroy(&parser, node);
22418 pm_buffer_free(&parser_buffer);
22419 pm_parser_free(&parser);
22420 pm_options_free(&options);
22421}
22422
22427pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22428 pm_options_t options = { 0 };
22429 pm_options_read(&options, data);
22430
22431 pm_parser_t parser;
22432 pm_parser_init(&parser, source, size, &options);
22433
22434 pm_node_t *node = pm_parse(&parser);
22435 pm_serialize_header(buffer);
22436 pm_serialize_encoding(parser.encoding, buffer);
22437 pm_buffer_append_varsint(buffer, parser.start_line);
22438 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22439
22440 pm_node_destroy(&parser, node);
22441 pm_parser_free(&parser);
22442 pm_options_free(&options);
22443}
22444
22445#endif
22446
22447/******************************************************************************/
22448/* Slice queries for the Ruby API */
22449/******************************************************************************/
22450
22452typedef enum {
22454 PM_SLICE_TYPE_ERROR = -1,
22455
22457 PM_SLICE_TYPE_NONE,
22458
22460 PM_SLICE_TYPE_LOCAL,
22461
22463 PM_SLICE_TYPE_CONSTANT,
22464
22466 PM_SLICE_TYPE_METHOD_NAME
22467} pm_slice_type_t;
22468
22472pm_slice_type_t
22473pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22474 // first, get the right encoding object
22475 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22476 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22477
22478 // check that there is at least one character
22479 if (length == 0) return PM_SLICE_TYPE_NONE;
22480
22481 size_t width;
22482 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22483 // valid because alphabetical
22484 } else if (*source == '_') {
22485 // valid because underscore
22486 width = 1;
22487 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22488 // valid because multibyte
22489 } else {
22490 // invalid because no match
22491 return PM_SLICE_TYPE_NONE;
22492 }
22493
22494 // determine the type of the slice based on the first character
22495 const uint8_t *end = source + length;
22496 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22497
22498 // next, iterate through all of the bytes of the string to ensure that they
22499 // are all valid identifier characters
22500 source += width;
22501
22502 while (source < end) {
22503 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22504 // valid because alphanumeric
22505 source += width;
22506 } else if (*source == '_') {
22507 // valid because underscore
22508 source++;
22509 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22510 // valid because multibyte
22511 source += width;
22512 } else {
22513 // invalid because no match
22514 break;
22515 }
22516 }
22517
22518 // accept a ! or ? at the end of the slice as a method name
22519 if (*source == '!' || *source == '?' || *source == '=') {
22520 source++;
22521 result = PM_SLICE_TYPE_METHOD_NAME;
22522 }
22523
22524 // valid if we are at the end of the slice
22525 return source == end ? result : PM_SLICE_TYPE_NONE;
22526}
22527
22532pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22533 switch (pm_slice_type(source, length, encoding_name)) {
22534 case PM_SLICE_TYPE_ERROR:
22535 return PM_STRING_QUERY_ERROR;
22536 case PM_SLICE_TYPE_NONE:
22537 case PM_SLICE_TYPE_CONSTANT:
22538 case PM_SLICE_TYPE_METHOD_NAME:
22539 return PM_STRING_QUERY_FALSE;
22540 case PM_SLICE_TYPE_LOCAL:
22541 return PM_STRING_QUERY_TRUE;
22542 }
22543
22544 assert(false && "unreachable");
22545 return PM_STRING_QUERY_FALSE;
22546}
22547
22552pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22553 switch (pm_slice_type(source, length, encoding_name)) {
22554 case PM_SLICE_TYPE_ERROR:
22555 return PM_STRING_QUERY_ERROR;
22556 case PM_SLICE_TYPE_NONE:
22557 case PM_SLICE_TYPE_LOCAL:
22558 case PM_SLICE_TYPE_METHOD_NAME:
22559 return PM_STRING_QUERY_FALSE;
22560 case PM_SLICE_TYPE_CONSTANT:
22561 return PM_STRING_QUERY_TRUE;
22562 }
22563
22564 assert(false && "unreachable");
22565 return PM_STRING_QUERY_FALSE;
22566}
22567
22572pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22573#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22574#define C1(c) (*source == c)
22575#define C2(s) (memcmp(source, s, 2) == 0)
22576#define C3(s) (memcmp(source, s, 3) == 0)
22577
22578 switch (pm_slice_type(source, length, encoding_name)) {
22579 case PM_SLICE_TYPE_ERROR:
22580 return PM_STRING_QUERY_ERROR;
22581 case PM_SLICE_TYPE_NONE:
22582 break;
22583 case PM_SLICE_TYPE_LOCAL:
22584 // numbered parameters are not valid method names
22585 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22586 case PM_SLICE_TYPE_CONSTANT:
22587 // all constants are valid method names
22588 case PM_SLICE_TYPE_METHOD_NAME:
22589 // all method names are valid method names
22590 return PM_STRING_QUERY_TRUE;
22591 }
22592
22593 switch (length) {
22594 case 1:
22595 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22596 case 2:
22597 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22598 case 3:
22599 return B(C3("===") || C3("<=>") || C3("[]="));
22600 default:
22601 return PM_STRING_QUERY_FALSE;
22602 }
22603
22604#undef B
22605#undef C1
22606#undef C2
22607#undef C3
22608}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:208
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:192
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:172
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:222
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:228
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:101
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:98
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:499
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:324
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:351
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:336
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:348
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:360
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:306
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:387
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:339
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:312
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:381
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:417
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:432
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:405
@ PM_CONTEXT_IF
an if statement
Definition parser.h:363
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:399
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:378
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:366
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:402
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:303
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:426
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:411
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:342
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:435
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:357
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:408
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:345
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:315
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:384
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:369
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:414
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:354
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:429
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:396
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:438
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:569
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:525
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:451
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:351
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:359
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:367
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22221
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22157
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22195
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22308
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:21896
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17302
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17304
const uint8_t * start
The start of the regular expression.
Definition prism.c:17307
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17318
const uint8_t * end
The end of the regular expression.
Definition prism.c:17310
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20292
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20303
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20294
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20300
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20297
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20310
AndNode.
Definition ast.h:1272
struct pm_node * left
AndNode::left.
Definition ast.h:1288
struct pm_node * right
AndNode::right.
Definition ast.h:1301
ArgumentsNode.
Definition ast.h:1333
pm_node_t base
The embedded base node.
Definition ast.h:1335
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1346
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1586
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1597
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1600
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1588
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1591
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1594
ArrayNode.
Definition ast.h:1364
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1374
ArrayPatternNode.
Definition ast.h:1425
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1444
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1484
pm_node_t base
The embedded base node.
Definition ast.h:1427
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1494
AssocNode.
Definition ast.h:1509
struct pm_node * value
AssocNode::value.
Definition ast.h:1541
struct pm_node * key
AssocNode::key.
Definition ast.h:1528
BeginNode.
Definition ast.h:1635
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1688
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1668
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1658
pm_node_t base
The embedded base node.
Definition ast.h:1637
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1678
This struct represents a set of binding powers used for a given token.
Definition prism.c:12146
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12154
pm_binding_power_t left
The left binding power.
Definition prism.c:12148
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12160
pm_binding_power_t right
The right binding power.
Definition prism.c:12151
BlockLocalVariableNode.
Definition ast.h:1754
BlockNode.
Definition ast.h:1782
BlockParameterNode.
Definition ast.h:1858
BlockParametersNode.
Definition ast.h:1912
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2139
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2200
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2220
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2158
pm_constant_id_t name
CallNode::name.
Definition ast.h:2181
pm_node_t base
The embedded base node.
Definition ast.h:2141
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2233
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2171
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2191
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2210
struct pm_node * block
CallNode::block.
Definition ast.h:2243
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2601
CaseNode.
Definition ast.h:2648
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2671
ClassVariableReadNode.
Definition ast.h:2943
ClassVariableTargetNode.
Definition ast.h:2972
ClassVariableWriteNode.
Definition ast.h:2995
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:461
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:469
pm_location_t location
The location of the comment in the source.
Definition parser.h:466
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3209
ConstantPathTargetNode.
Definition ast.h:3347
ConstantReadNode.
Definition ast.h:3442
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3471
ConstantWriteNode.
Definition ast.h:3494
This is a node in a linked list of contexts.
Definition parser.h:442
pm_context_t context
The context that this node represents.
Definition parser.h:444
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:447
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:366
ElseNode.
Definition ast.h:3673
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3686
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3771
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3784
FindPatternNode.
Definition ast.h:3831
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3844
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3896
pm_node_t base
The embedded base node.
Definition ast.h:3833
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3909
FlipFlopNode.
Definition ast.h:3927
FloatNode.
Definition ast.h:3960
double value
FloatNode::value.
Definition ast.h:3970
pm_node_t base
The embedded base node.
Definition ast.h:3962
ForwardingParameterNode.
Definition ast.h:4096
GlobalVariableReadNode.
Definition ast.h:4263
GlobalVariableTargetNode.
Definition ast.h:4292
GlobalVariableWriteNode.
Definition ast.h:4315
HashNode.
Definition ast.h:4377
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4403
HashPatternNode.
Definition ast.h:4437
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4492
pm_node_t base
The embedded base node.
Definition ast.h:4439
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4505
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4453
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4526
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4586
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4605
ImaginaryNode.
Definition ast.h:4632
InstanceVariableReadNode.
Definition ast.h:5122
InstanceVariableTargetNode.
Definition ast.h:5151
InstanceVariableWriteNode.
Definition ast.h:5174
IntegerNode.
Definition ast.h:5242
pm_integer_t value
IntegerNode::value.
Definition ast.h:5252
pm_node_t base
The embedded base node.
Definition ast.h:5244
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5280
InterpolatedRegularExpressionNode.
Definition ast.h:5326
InterpolatedStringNode.
Definition ast.h:5363
pm_node_t base
The embedded base node.
Definition ast.h:5365
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5371
InterpolatedSymbolNode.
Definition ast.h:5396
pm_node_t base
The embedded base node.
Definition ast.h:5398
InterpolatedXStringNode.
Definition ast.h:5429
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5437
pm_node_t base
The embedded base node.
Definition ast.h:5431
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5442
KeywordHashNode.
Definition ast.h:5501
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:521
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:515
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:535
pm_constant_id_t name
The name of the local variable.
Definition parser.h:537
pm_location_t location
The location of the local variable in the source.
Definition parser.h:540
uint32_t hash
The hash of the local variable.
Definition parser.h:549
uint32_t index
The index of the local variable in the local table.
Definition parser.h:543
uint32_t reads
The number of times the local variable is read.
Definition parser.h:546
LocalVariableReadNode.
Definition ast.h:5743
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5774
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5761
LocalVariableTargetNode.
Definition ast.h:5792
LocalVariableWriteNode.
Definition ast.h:5820
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5847
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5834
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:557
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:565
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:562
uint32_t size
The number of local variables in the set.
Definition parser.h:559
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:478
MatchLastLineNode.
Definition ast.h:5912
MatchWriteNode.
Definition ast.h:6070
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6083
MissingNode.
Definition ast.h:6095
MultiTargetNode.
Definition ast.h:6166
pm_node_t base
The embedded base node.
Definition ast.h:6168
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6224
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6184
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6234
MultiWriteNode.
Definition ast.h:6249
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1052
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1057
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1075
OptionalParameterNode.
Definition ast.h:6522
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:107
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:156
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:118
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:172
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:179
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:133
int32_t line
The line within the file that the parse starts on.
Definition options.h:127
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:112
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:165
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:189
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:138
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:121
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:153
OrNode.
Definition ast.h:6560
struct pm_node * left
OrNode::left.
Definition ast.h:6576
struct pm_node * right
OrNode::right.
Definition ast.h:6589
ParametersNode.
Definition ast.h:6615
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6633
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6653
pm_node_t base
The embedded base node.
Definition ast.h:6617
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6648
ParenthesesNode.
Definition ast.h:6671
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6679
This struct represents the overall parser.
Definition parser.h:643
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:843
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:652
uint8_t command_line
The command line flags given from the options.
Definition parser.h:862
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:758
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:885
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:912
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:697
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:891
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:800
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:933
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:789
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:915
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:710
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:752
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:724
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:661
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:777
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:859
pm_token_t previous
The previous token we were considering.
Definition parser.h:700
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:806
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:878
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:927
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:906
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:731
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:743
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:694
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:655
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:737
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:872
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:856
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:771
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:687
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:734
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:718
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:667
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:765
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:812
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:899
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:684
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:721
size_t index
The current index into the lexer mode stack.
Definition parser.h:690
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:783
pm_scope_t * current_scope
The current local scope.
Definition parser.h:740
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:888
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:792
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:921
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:649
RangeNode.
Definition ast.h:6907
struct pm_node * right
RangeNode::right.
Definition ast.h:6937
struct pm_node * left
RangeNode::left.
Definition ast.h:6923
RationalNode.
Definition ast.h:6965
pm_node_t base
The embedded base node.
Definition ast.h:6967
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6977
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9473
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9478
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9475
RegularExpressionNode.
Definition ast.h:7032
pm_node_t base
The embedded base node.
Definition ast.h:7034
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7055
RequiredParameterNode.
Definition ast.h:7106
RescueModifierNode.
Definition ast.h:7129
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7147
RescueNode.
Definition ast.h:7167
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7205
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7195
pm_node_t base
The embedded base node.
Definition ast.h:7169
This struct represents a node in a linked list of scopes.
Definition parser.h:583
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:585
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:596
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:623
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:588
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:617
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:629
SplatNode.
Definition ast.h:7467
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7480
StatementsNode.
Definition ast.h:7495
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7503
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7530
pm_node_t base
The embedded base node.
Definition ast.h:7532
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7553
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7548
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7538
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@105 type
The type of the string.
SymbolNode.
Definition ast.h:7626
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7639
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7649
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9447
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9452
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9458
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7682
UnlessNode.
Definition ast.h:7713
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7763
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7773
WhenNode.
Definition ast.h:7849
XStringNode.
Definition ast.h:7940